github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/network/ipv4/ipv4.go (about) 1 // Copyright 2021 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv4 contains the implementation of the ipv4 network protocol. 16 package ipv4 17 18 import ( 19 "fmt" 20 "math" 21 "reflect" 22 "sync/atomic" 23 "time" 24 25 "github.com/SagerNet/gvisor/pkg/sync" 26 "github.com/SagerNet/gvisor/pkg/tcpip" 27 "github.com/SagerNet/gvisor/pkg/tcpip/buffer" 28 "github.com/SagerNet/gvisor/pkg/tcpip/header" 29 "github.com/SagerNet/gvisor/pkg/tcpip/header/parse" 30 "github.com/SagerNet/gvisor/pkg/tcpip/network/hash" 31 "github.com/SagerNet/gvisor/pkg/tcpip/network/internal/fragmentation" 32 "github.com/SagerNet/gvisor/pkg/tcpip/network/internal/ip" 33 "github.com/SagerNet/gvisor/pkg/tcpip/stack" 34 ) 35 36 const ( 37 // ReassembleTimeout is the time a packet stays in the reassembly 38 // system before being evicted. 39 // As per RFC 791 section 3.2: 40 // The current recommendation for the initial timer setting is 15 seconds. 41 // This may be changed as experience with this protocol accumulates. 42 // 43 // Considering that it is an old recommendation, we use the same reassembly 44 // timeout that linux defines, which is 30 seconds: 45 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ip.h#L138 46 ReassembleTimeout = 30 * time.Second 47 48 // ProtocolNumber is the ipv4 protocol number. 49 ProtocolNumber = header.IPv4ProtocolNumber 50 51 // MaxTotalSize is maximum size that can be encoded in the 16-bit 52 // TotalLength field of the ipv4 header. 53 MaxTotalSize = 0xffff 54 55 // DefaultTTL is the default time-to-live value for this endpoint. 56 DefaultTTL = 64 57 58 // buckets is the number of identifier buckets. 59 buckets = 2048 60 61 // The size of a fragment block, in bytes, as per RFC 791 section 3.1, 62 // page 14. 63 fragmentblockSize = 8 64 ) 65 66 const ( 67 forwardingDisabled = 0 68 forwardingEnabled = 1 69 ) 70 71 var ipv4BroadcastAddr = header.IPv4Broadcast.WithPrefix() 72 73 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 74 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 75 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 76 var _ stack.AddressableEndpoint = (*endpoint)(nil) 77 var _ stack.NetworkEndpoint = (*endpoint)(nil) 78 79 type endpoint struct { 80 nic stack.NetworkInterface 81 dispatcher stack.TransportDispatcher 82 protocol *protocol 83 stats sharedStats 84 85 // enabled is set to 1 when the endpoint is enabled and 0 when it is 86 // disabled. 87 // 88 // Must be accessed using atomic operations. 89 enabled uint32 90 91 // forwarding is set to forwardingEnabled when the endpoint has forwarding 92 // enabled and forwardingDisabled when it is disabled. 93 // 94 // Must be accessed using atomic operations. 95 forwarding uint32 96 97 mu struct { 98 sync.RWMutex 99 100 addressableEndpointState stack.AddressableEndpointState 101 igmp igmpState 102 } 103 } 104 105 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 106 func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { 107 // If we are operating as a router, return an ICMP error to the original 108 // packet's sender. 109 if pkt.NetworkPacketInfo.IsForwardedPacket { 110 // TODO(github.com/SagerNet/issue/6005): Propagate asynchronously generated ICMP 111 // errors to local endpoints. 112 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt) 113 e.stats.ip.Forwarding.Errors.Increment() 114 e.stats.ip.Forwarding.HostUnreachable.Increment() 115 return 116 } 117 // handleControl expects the entire offending packet to be in the packet 118 // buffer's data field. 119 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 120 Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()), 121 }) 122 pkt.NICID = e.nic.ID() 123 pkt.NetworkProtocolNumber = ProtocolNumber 124 // Use the same control type as an ICMPv4 destination host unreachable error 125 // since the host is considered unreachable if we cannot resolve the link 126 // address to the next hop. 127 e.handleControl(&icmpv4DestinationHostUnreachableSockError{}, pkt) 128 } 129 130 // NewEndpoint creates a new ipv4 endpoint. 131 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 132 e := &endpoint{ 133 nic: nic, 134 dispatcher: dispatcher, 135 protocol: p, 136 } 137 e.mu.Lock() 138 e.mu.addressableEndpointState.Init(e) 139 e.mu.igmp.init(e) 140 e.mu.Unlock() 141 142 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 143 144 stackStats := p.stack.Stats() 145 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 146 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V4) 147 e.stats.igmp.init(&e.stats.localStats.IGMP, &stackStats.IGMP) 148 149 p.mu.Lock() 150 p.mu.eps[nic.ID()] = e 151 p.mu.Unlock() 152 153 return e 154 } 155 156 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 157 p.mu.RLock() 158 defer p.mu.RUnlock() 159 160 for _, e := range p.mu.eps { 161 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 162 addressEndpoint.DecRef() 163 return e 164 } 165 } 166 167 return nil 168 } 169 170 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 171 p.mu.Lock() 172 defer p.mu.Unlock() 173 delete(p.mu.eps, nicID) 174 } 175 176 // Forwarding implements stack.ForwardingNetworkEndpoint. 177 func (e *endpoint) Forwarding() bool { 178 return atomic.LoadUint32(&e.forwarding) == forwardingEnabled 179 } 180 181 // setForwarding sets the forwarding status for the endpoint. 182 // 183 // Returns true if the forwarding status was updated. 184 func (e *endpoint) setForwarding(v bool) bool { 185 forwarding := uint32(forwardingDisabled) 186 if v { 187 forwarding = forwardingEnabled 188 } 189 190 return atomic.SwapUint32(&e.forwarding, forwarding) != forwarding 191 } 192 193 // SetForwarding implements stack.ForwardingNetworkEndpoint. 194 func (e *endpoint) SetForwarding(forwarding bool) { 195 e.mu.Lock() 196 defer e.mu.Unlock() 197 198 if !e.setForwarding(forwarding) { 199 return 200 } 201 202 if forwarding { 203 // There does not seem to be an RFC requirement for a node to join the all 204 // routers multicast address but 205 // https://www.iana.org/assignments/multicast-addresses/multicast-addresses.xhtml 206 // specifies the address as a group for all routers on a subnet so we join 207 // the group here. 208 if err := e.joinGroupLocked(header.IPv4AllRoutersGroup); err != nil { 209 // joinGroupLocked only returns an error if the group address is not a 210 // valid IPv4 multicast address. 211 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 212 } 213 214 return 215 } 216 217 switch err := e.leaveGroupLocked(header.IPv4AllRoutersGroup).(type) { 218 case nil: 219 case *tcpip.ErrBadLocalAddress: 220 // The endpoint may have already left the multicast group. 221 default: 222 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", header.IPv4AllRoutersGroup, err)) 223 } 224 } 225 226 // Enable implements stack.NetworkEndpoint. 227 func (e *endpoint) Enable() tcpip.Error { 228 e.mu.Lock() 229 defer e.mu.Unlock() 230 231 // If the NIC is not enabled, the endpoint can't do anything meaningful so 232 // don't enable the endpoint. 233 if !e.nic.Enabled() { 234 return &tcpip.ErrNotPermitted{} 235 } 236 237 // If the endpoint is already enabled, there is nothing for it to do. 238 if !e.setEnabled(true) { 239 return nil 240 } 241 242 // Create an endpoint to receive broadcast packets on this interface. 243 ep, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(ipv4BroadcastAddr, stack.NeverPrimaryEndpoint, stack.AddressConfigStatic, false /* deprecated */) 244 if err != nil { 245 return err 246 } 247 // We have no need for the address endpoint. 248 ep.DecRef() 249 250 // Groups may have been joined while the endpoint was disabled, or the 251 // endpoint may have left groups from the perspective of IGMP when the 252 // endpoint was disabled. Either way, we need to let routers know to 253 // send us multicast traffic. 254 e.mu.igmp.initializeAll() 255 256 // As per RFC 1122 section 3.3.7, all hosts should join the all-hosts 257 // multicast group. Note, the IANA calls the all-hosts multicast group the 258 // all-systems multicast group. 259 if err := e.joinGroupLocked(header.IPv4AllSystems); err != nil { 260 // joinGroupLocked only returns an error if the group address is not a valid 261 // IPv4 multicast address. 262 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv4AllSystems, err)) 263 } 264 265 return nil 266 } 267 268 // Enabled implements stack.NetworkEndpoint. 269 func (e *endpoint) Enabled() bool { 270 return e.nic.Enabled() && e.isEnabled() 271 } 272 273 // isEnabled returns true if the endpoint is enabled, regardless of the 274 // enabled status of the NIC. 275 func (e *endpoint) isEnabled() bool { 276 return atomic.LoadUint32(&e.enabled) == 1 277 } 278 279 // setEnabled sets the enabled status for the endpoint. 280 // 281 // Returns true if the enabled status was updated. 282 func (e *endpoint) setEnabled(v bool) bool { 283 if v { 284 return atomic.SwapUint32(&e.enabled, 1) == 0 285 } 286 return atomic.SwapUint32(&e.enabled, 0) == 1 287 } 288 289 // Disable implements stack.NetworkEndpoint. 290 func (e *endpoint) Disable() { 291 e.mu.Lock() 292 defer e.mu.Unlock() 293 e.disableLocked() 294 } 295 296 func (e *endpoint) disableLocked() { 297 if !e.isEnabled() { 298 return 299 } 300 301 // The endpoint may have already left the multicast group. 302 switch err := e.leaveGroupLocked(header.IPv4AllSystems).(type) { 303 case nil, *tcpip.ErrBadLocalAddress: 304 default: 305 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv4AllSystems, err)) 306 } 307 308 // Leave groups from the perspective of IGMP so that routers know that 309 // we are no longer interested in the group. 310 e.mu.igmp.softLeaveAll() 311 312 // The address may have already been removed. 313 switch err := e.mu.addressableEndpointState.RemovePermanentAddress(ipv4BroadcastAddr.Address); err.(type) { 314 case nil, *tcpip.ErrBadLocalAddress: 315 default: 316 panic(fmt.Sprintf("unexpected error when removing address = %s: %s", ipv4BroadcastAddr.Address, err)) 317 } 318 319 // Reset the IGMP V1 present flag. 320 // 321 // If the node comes back up on the same network, it will re-learn that it 322 // needs to perform IGMPv1. 323 e.mu.igmp.resetV1Present() 324 325 if !e.setEnabled(false) { 326 panic("should have only done work to disable the endpoint if it was enabled") 327 } 328 } 329 330 // DefaultTTL is the default time-to-live value for this endpoint. 331 func (e *endpoint) DefaultTTL() uint8 { 332 return e.protocol.DefaultTTL() 333 } 334 335 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 336 // network layer max header length. 337 func (e *endpoint) MTU() uint32 { 338 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv4MinimumSize) 339 if err != nil { 340 return 0 341 } 342 return networkMTU 343 } 344 345 // MaxHeaderLength returns the maximum length needed by ipv4 headers (and 346 // underlying protocols). 347 func (e *endpoint) MaxHeaderLength() uint16 { 348 return e.nic.MaxHeaderLength() + header.IPv4MaximumHeaderSize 349 } 350 351 // NetworkProtocolNumber implements stack.NetworkEndpoint. 352 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 353 return e.protocol.Number() 354 } 355 356 func (e *endpoint) addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, options header.IPv4OptionsSerializer) tcpip.Error { 357 hdrLen := header.IPv4MinimumSize 358 var optLen int 359 if options != nil { 360 optLen = int(options.Length()) 361 } 362 hdrLen += optLen 363 if hdrLen > header.IPv4MaximumHeaderSize { 364 return &tcpip.ErrMessageTooLong{} 365 } 366 ipH := header.IPv4(pkt.NetworkHeader().Push(hdrLen)) 367 length := pkt.Size() 368 if length > math.MaxUint16 { 369 return &tcpip.ErrMessageTooLong{} 370 } 371 // RFC 6864 section 4.3 mandates uniqueness of ID values for non-atomic 372 // datagrams. Since the DF bit is never being set here, all datagrams 373 // are non-atomic and need an ID. 374 id := atomic.AddUint32(&e.protocol.ids[hashRoute(srcAddr, dstAddr, params.Protocol, e.protocol.hashIV)%buckets], 1) 375 ipH.Encode(&header.IPv4Fields{ 376 TotalLength: uint16(length), 377 ID: uint16(id), 378 TTL: params.TTL, 379 TOS: params.TOS, 380 Protocol: uint8(params.Protocol), 381 SrcAddr: srcAddr, 382 DstAddr: dstAddr, 383 Options: options, 384 }) 385 ipH.SetChecksum(^ipH.CalculateChecksum()) 386 pkt.NetworkProtocolNumber = ProtocolNumber 387 return nil 388 } 389 390 // handleFragments fragments pkt and calls the handler function on each 391 // fragment. It returns the number of fragments handled and the number of 392 // fragments left to be processed. The IP header must already be present in the 393 // original packet. 394 func (e *endpoint) handleFragments(_ *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { 395 // Round the MTU down to align to 8 bytes. 396 fragmentPayloadSize := networkMTU &^ 7 397 networkHeader := header.IPv4(pkt.NetworkHeader().View()) 398 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadSize, pkt.AvailableHeaderBytes()+len(networkHeader)) 399 400 var n int 401 for { 402 fragPkt, more := buildNextFragment(&pf, networkHeader) 403 if err := handler(fragPkt); err != nil { 404 return n, pf.RemainingFragmentCount() + 1, err 405 } 406 n++ 407 if !more { 408 return n, pf.RemainingFragmentCount(), nil 409 } 410 } 411 } 412 413 // WritePacket writes a packet to the given destination address and protocol. 414 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { 415 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 416 return err 417 } 418 419 // iptables filtering. All packets that reach here are locally 420 // generated. 421 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 422 if ok := e.protocol.stack.IPTables().Check(stack.Output, pkt, r, "" /* preroutingAddr */, "" /* inNicName */, outNicName); !ok { 423 // iptables is telling us to drop the packet. 424 e.stats.ip.IPTablesOutputDropped.Increment() 425 return nil 426 } 427 428 // If the packet is manipulated as per NAT Output rules, handle packet 429 // based on destination address and do not send the packet to link 430 // layer. 431 // 432 // We should do this for every packet, rather than only NATted packets, but 433 // removing this check short circuits broadcasts before they are sent out to 434 // other hosts. 435 if pkt.NatDone { 436 netHeader := header.IPv4(pkt.NetworkHeader().View()) 437 if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil { 438 // Since we rewrote the packet but it is being routed back to us, we 439 // can safely assume the checksum is valid. 440 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 441 return nil 442 } 443 } 444 445 return e.writePacket(r, pkt, false /* headerIncluded */) 446 } 447 448 func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, headerIncluded bool) tcpip.Error { 449 if r.Loop()&stack.PacketLoop != 0 { 450 // If the packet was generated by the stack (not a raw/packet endpoint 451 // where a packet may be written with the header included), then we can 452 // safely assume the checksum is valid. 453 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 454 } 455 if r.Loop()&stack.PacketOut == 0 { 456 return nil 457 } 458 459 // Postrouting NAT can only change the source address, and does not alter the 460 // route or outgoing interface of the packet. 461 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 462 if ok := e.protocol.stack.IPTables().Check(stack.Postrouting, pkt, r, "" /* preroutingAddr */, "" /* inNicName */, outNicName); !ok { 463 // iptables is telling us to drop the packet. 464 e.stats.ip.IPTablesPostroutingDropped.Increment() 465 return nil 466 } 467 468 stats := e.stats.ip 469 470 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size())) 471 if err != nil { 472 stats.OutgoingPacketErrors.Increment() 473 return err 474 } 475 476 if packetMustBeFragmented(pkt, networkMTU) { 477 h := header.IPv4(pkt.NetworkHeader().View()) 478 if h.Flags()&header.IPv4FlagDontFragment != 0 && pkt.NetworkPacketInfo.IsForwardedPacket { 479 // TODO(github.com/SagerNet/issue/5919): Handle error condition in which DontFragment 480 // is set but the packet must be fragmented for the non-forwarding case. 481 return &tcpip.ErrMessageTooLong{} 482 } 483 sent, remain, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { 484 // TODO(github.com/SagerNet/issue/3884): Evaluate whether we want to send each 485 // fragment one by one using WritePacket() (current strategy) or if we 486 // want to create a PacketBufferList from the fragments and feed it to 487 // WritePackets(). It'll be faster but cost more memory. 488 return e.nic.WritePacket(r, ProtocolNumber, fragPkt) 489 }) 490 stats.PacketsSent.IncrementBy(uint64(sent)) 491 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 492 return err 493 } 494 495 if err := e.nic.WritePacket(r, ProtocolNumber, pkt); err != nil { 496 stats.OutgoingPacketErrors.Increment() 497 return err 498 } 499 stats.PacketsSent.Increment() 500 return nil 501 } 502 503 // WritePackets implements stack.NetworkEndpoint. 504 func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { 505 if r.Loop()&stack.PacketLoop != 0 { 506 panic("multiple packets in local loop") 507 } 508 if r.Loop()&stack.PacketOut == 0 { 509 return pkts.Len(), nil 510 } 511 512 stats := e.stats.ip 513 514 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { 515 if err := e.addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* options */); err != nil { 516 return 0, err 517 } 518 519 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size())) 520 if err != nil { 521 stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len())) 522 return 0, err 523 } 524 525 if packetMustBeFragmented(pkt, networkMTU) { 526 // Keep track of the packet that is about to be fragmented so it can be 527 // removed once the fragmentation is done. 528 originalPkt := pkt 529 if _, _, err := e.handleFragments(r, networkMTU, pkt, func(fragPkt *stack.PacketBuffer) tcpip.Error { 530 // Modify the packet list in place with the new fragments. 531 pkts.InsertAfter(pkt, fragPkt) 532 pkt = fragPkt 533 return nil 534 }); err != nil { 535 panic(fmt.Sprintf("e.handleFragments(_, _, %d, _, _) = %s", networkMTU, err)) 536 } 537 // Remove the packet that was just fragmented and process the rest. 538 pkts.Remove(originalPkt) 539 } 540 } 541 542 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 543 // iptables filtering. All packets that reach here are locally 544 // generated. 545 outputDropped, natPkts := e.protocol.stack.IPTables().CheckPackets(stack.Output, pkts, r, "" /* inNicName */, outNicName) 546 stats.IPTablesOutputDropped.IncrementBy(uint64(len(outputDropped))) 547 for pkt := range outputDropped { 548 pkts.Remove(pkt) 549 } 550 551 // The NAT-ed packets may now be destined for us. 552 locallyDelivered := 0 553 for pkt := range natPkts { 554 ep := e.protocol.findEndpointWithAddress(header.IPv4(pkt.NetworkHeader().View()).DestinationAddress()) 555 if ep == nil { 556 // The NAT-ed packet is still destined for some remote node. 557 continue 558 } 559 560 // Do not send the locally destined packet out the NIC. 561 pkts.Remove(pkt) 562 563 // Deliver the packet locally. 564 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 565 locallyDelivered++ 566 567 } 568 569 // We ignore the list of NAT-ed packets here because Postrouting NAT can only 570 // change the source address, and does not alter the route or outgoing 571 // interface of the packet. 572 postroutingDropped, _ := e.protocol.stack.IPTables().CheckPackets(stack.Postrouting, pkts, r, "" /* inNicName */, outNicName) 573 stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped))) 574 for pkt := range postroutingDropped { 575 pkts.Remove(pkt) 576 } 577 578 // The rest of the packets can be delivered to the NIC as a batch. 579 pktsLen := pkts.Len() 580 written, err := e.nic.WritePackets(r, pkts, ProtocolNumber) 581 stats.PacketsSent.IncrementBy(uint64(written)) 582 stats.OutgoingPacketErrors.IncrementBy(uint64(pktsLen - written)) 583 584 // Dropped packets aren't errors, so include them in the return value. 585 return locallyDelivered + written + len(outputDropped) + len(postroutingDropped), err 586 } 587 588 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 589 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 590 // The packet already has an IP header, but there are a few required 591 // checks. 592 h, ok := pkt.Data().PullUp(header.IPv4MinimumSize) 593 if !ok { 594 return &tcpip.ErrMalformedHeader{} 595 } 596 597 hdrLen := header.IPv4(h).HeaderLength() 598 if hdrLen < header.IPv4MinimumSize { 599 return &tcpip.ErrMalformedHeader{} 600 } 601 602 h, ok = pkt.Data().PullUp(int(hdrLen)) 603 if !ok { 604 return &tcpip.ErrMalformedHeader{} 605 } 606 ipH := header.IPv4(h) 607 608 // Always set the total length. 609 pktSize := pkt.Data().Size() 610 ipH.SetTotalLength(uint16(pktSize)) 611 612 // Set the source address when zero. 613 if ipH.SourceAddress() == header.IPv4Any { 614 ipH.SetSourceAddress(r.LocalAddress()) 615 } 616 617 // Set the packet ID when zero. 618 if ipH.ID() == 0 { 619 // RFC 6864 section 4.3 mandates uniqueness of ID values for 620 // non-atomic datagrams, so assign an ID to all such datagrams 621 // according to the definition given in RFC 6864 section 4. 622 if ipH.Flags()&header.IPv4FlagDontFragment == 0 || ipH.Flags()&header.IPv4FlagMoreFragments != 0 || ipH.FragmentOffset() > 0 { 623 ipH.SetID(uint16(atomic.AddUint32(&e.protocol.ids[hashRoute(r.LocalAddress(), r.RemoteAddress(), 0 /* protocol */, e.protocol.hashIV)%buckets], 1))) 624 } 625 } 626 627 // Always set the checksum. 628 ipH.SetChecksum(0) 629 ipH.SetChecksum(^ipH.CalculateChecksum()) 630 631 // Populate the packet buffer's network header and don't allow an invalid 632 // packet to be sent. 633 // 634 // Note that parsing only makes sure that the packet is well formed as per the 635 // wire format. We also want to check if the header's fields are valid before 636 // sending the packet. 637 if !parse.IPv4(pkt) || !header.IPv4(pkt.NetworkHeader().View()).IsValid(pktSize) { 638 return &tcpip.ErrMalformedHeader{} 639 } 640 641 return e.writePacket(r, pkt, true /* headerIncluded */) 642 } 643 644 // forwardPacket attempts to forward a packet to its final destination. 645 func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { 646 h := header.IPv4(pkt.NetworkHeader().View()) 647 648 dstAddr := h.DestinationAddress() 649 // As per RFC 3927 section 7, 650 // 651 // A router MUST NOT forward a packet with an IPv4 Link-Local source or 652 // destination address, irrespective of the router's default route 653 // configuration or routes obtained from dynamic routing protocols. 654 // 655 // A router which receives a packet with an IPv4 Link-Local source or 656 // destination address MUST NOT forward the packet. This prevents 657 // forwarding of packets back onto the network segment from which they 658 // originated, or to any other segment. 659 if header.IsV4LinkLocalUnicastAddress(h.SourceAddress()) { 660 return &ip.ErrLinkLocalSourceAddress{} 661 } 662 if header.IsV4LinkLocalUnicastAddress(dstAddr) || header.IsV4LinkLocalMulticastAddress(dstAddr) { 663 return &ip.ErrLinkLocalDestinationAddress{} 664 } 665 666 ttl := h.TTL() 667 if ttl == 0 { 668 // As per RFC 792 page 6, Time Exceeded Message, 669 // 670 // If the gateway processing a datagram finds the time to live field 671 // is zero it must discard the datagram. The gateway may also notify 672 // the source host via the time exceeded message. 673 // 674 // We return the original error rather than the result of returning 675 // the ICMP packet because the original error is more relevant to 676 // the caller. 677 _ = e.protocol.returnError(&icmpReasonTTLExceeded{}, pkt) 678 return &ip.ErrTTLExceeded{} 679 } 680 681 if opts := h.Options(); len(opts) != 0 { 682 newOpts, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageForward{}) 683 if optProblem != nil { 684 if optProblem.NeedICMP { 685 _ = e.protocol.returnError(&icmpReasonParamProblem{ 686 pointer: optProblem.Pointer, 687 forwarding: true, 688 }, pkt) 689 } 690 return &ip.ErrParameterProblem{} 691 } 692 copied := copy(opts, newOpts) 693 if copied != len(newOpts) { 694 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 695 } 696 // Since in forwarding we handle all options, including copying those we 697 // do not recognise, the options region should remain the same size which 698 // simplifies processing. As we MAY receive a packet with a lot of padded 699 // bytes after the "end of options list" byte, make sure we copy 700 // them as the legal padding value (0). 701 for i := copied; i < len(opts); i++ { 702 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 703 opts[i] = byte(header.IPv4OptionListEndType) 704 } 705 } 706 707 stk := e.protocol.stack 708 709 // Check if the destination is owned by the stack. 710 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 711 inNicName := stk.FindNICNameFromID(e.nic.ID()) 712 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 713 if ok := stk.IPTables().Check(stack.Forward, pkt, nil, "" /* preroutingAddr */, inNicName, outNicName); !ok { 714 // iptables is telling us to drop the packet. 715 e.stats.ip.IPTablesForwardDropped.Increment() 716 return nil 717 } 718 719 // The packet originally arrived on e so provide its NIC as the input NIC. 720 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 721 return nil 722 } 723 724 r, err := stk.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */) 725 switch err.(type) { 726 case nil: 727 case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable: 728 // We return the original error rather than the result of returning 729 // the ICMP packet because the original error is more relevant to 730 // the caller. 731 _ = e.protocol.returnError(&icmpReasonNetworkUnreachable{}, pkt) 732 return &ip.ErrNoRoute{} 733 default: 734 return &ip.ErrOther{Err: err} 735 } 736 defer r.Release() 737 738 inNicName := stk.FindNICNameFromID(e.nic.ID()) 739 outNicName := stk.FindNICNameFromID(r.NICID()) 740 if ok := stk.IPTables().Check(stack.Forward, pkt, nil, "" /* preroutingAddr */, inNicName, outNicName); !ok { 741 // iptables is telling us to drop the packet. 742 e.stats.ip.IPTablesForwardDropped.Increment() 743 return nil 744 } 745 746 // We need to do a deep copy of the IP packet because 747 // WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do 748 // not own it. 749 newHdr := header.IPv4(stack.PayloadSince(pkt.NetworkHeader())) 750 751 // As per RFC 791 page 30, Time to Live, 752 // 753 // This field must be decreased at each point that the internet header 754 // is processed to reflect the time spent processing the datagram. 755 // Even if no local information is available on the time actually 756 // spent, the field must be decremented by 1. 757 newHdr.SetTTL(ttl - 1) 758 759 switch err := r.WriteHeaderIncludedPacket(stack.NewPacketBuffer(stack.PacketBufferOptions{ 760 ReserveHeaderBytes: int(r.MaxHeaderLength()), 761 Data: buffer.View(newHdr).ToVectorisedView(), 762 IsForwardedPacket: true, 763 })); err.(type) { 764 case nil: 765 return nil 766 case *tcpip.ErrMessageTooLong: 767 // As per RFC 792, page 4, Destination Unreachable: 768 // 769 // Another case is when a datagram must be fragmented to be forwarded by a 770 // gateway yet the Don't Fragment flag is on. In this case the gateway must 771 // discard the datagram and may return a destination unreachable message. 772 // 773 // WriteHeaderIncludedPacket checks for the presence of the Don't Fragment bit 774 // while sending the packet and returns this error iff fragmentation is 775 // necessary and the bit is also set. 776 _ = e.protocol.returnError(&icmpReasonFragmentationNeeded{}, pkt) 777 return &ip.ErrMessageTooLong{} 778 default: 779 return &ip.ErrOther{Err: err} 780 } 781 } 782 783 // HandlePacket is called by the link layer when new ipv4 packets arrive for 784 // this endpoint. 785 func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { 786 stats := e.stats.ip 787 788 stats.PacketsReceived.Increment() 789 790 if !e.isEnabled() { 791 stats.DisabledPacketsReceived.Increment() 792 return 793 } 794 795 h, ok := e.protocol.parseAndValidate(pkt) 796 if !ok { 797 stats.MalformedPacketsReceived.Increment() 798 return 799 } 800 801 if !e.nic.IsLoopback() { 802 if !e.protocol.options.AllowExternalLoopbackTraffic { 803 if header.IsV4LoopbackAddress(h.SourceAddress()) { 804 stats.InvalidSourceAddressesReceived.Increment() 805 return 806 } 807 808 if header.IsV4LoopbackAddress(h.DestinationAddress()) { 809 stats.InvalidDestinationAddressesReceived.Increment() 810 return 811 } 812 } 813 814 if e.protocol.stack.HandleLocal() { 815 addressEndpoint := e.AcquireAssignedAddress(header.IPv4(pkt.NetworkHeader().View()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint) 816 if addressEndpoint != nil { 817 addressEndpoint.DecRef() 818 819 // The source address is one of our own, so we never should have gotten 820 // a packet like this unless HandleLocal is false or our NIC is the 821 // loopback interface. 822 stats.InvalidSourceAddressesReceived.Increment() 823 return 824 } 825 } 826 827 // Loopback traffic skips the prerouting chain. 828 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 829 if ok := e.protocol.stack.IPTables().Check(stack.Prerouting, pkt, nil, e.MainAddress().Address, inNicName, "" /* outNicName */); !ok { 830 // iptables is telling us to drop the packet. 831 stats.IPTablesPreroutingDropped.Increment() 832 return 833 } 834 } 835 836 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 837 } 838 839 // handleLocalPacket is like HandlePacket except it does not perform the 840 // prerouting iptables hook or check for loopback traffic that originated from 841 // outside of the netstack (i.e. martian loopback packets). 842 func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { 843 stats := e.stats.ip 844 stats.PacketsReceived.Increment() 845 846 pkt = pkt.CloneToInbound() 847 pkt.RXTransportChecksumValidated = canSkipRXChecksum 848 849 h, ok := e.protocol.parseAndValidate(pkt) 850 if !ok { 851 stats.MalformedPacketsReceived.Increment() 852 return 853 } 854 855 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 856 } 857 858 func (e *endpoint) handleValidatedPacket(h header.IPv4, pkt *stack.PacketBuffer, inNICName string) { 859 // Raw socket packets are delivered based solely on the transport protocol 860 // number. We only require that the packet be valid IPv4, and that they not 861 // be fragmented. 862 if !h.More() && h.FragmentOffset() == 0 { 863 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 864 } 865 866 pkt.NICID = e.nic.ID() 867 stats := e.stats 868 stats.ip.ValidPacketsReceived.Increment() 869 870 srcAddr := h.SourceAddress() 871 dstAddr := h.DestinationAddress() 872 873 // As per RFC 1122 section 3.2.1.3: 874 // When a host sends any datagram, the IP source address MUST 875 // be one of its own IP addresses (but not a broadcast or 876 // multicast address). 877 if srcAddr == header.IPv4Broadcast || header.IsV4MulticastAddress(srcAddr) { 878 stats.ip.InvalidSourceAddressesReceived.Increment() 879 return 880 } 881 // Make sure the source address is not a subnet-local broadcast address. 882 if addressEndpoint := e.AcquireAssignedAddress(srcAddr, false /* createTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 883 subnet := addressEndpoint.Subnet() 884 addressEndpoint.DecRef() 885 if subnet.IsBroadcast(srcAddr) { 886 stats.ip.InvalidSourceAddressesReceived.Increment() 887 return 888 } 889 } 890 891 // Before we do any processing, note if the packet was received as some 892 // sort of broadcast. The destination address should be an address we own 893 // or a group we joined. 894 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil { 895 subnet := addressEndpoint.AddressWithPrefix().Subnet() 896 addressEndpoint.DecRef() 897 pkt.NetworkPacketInfo.LocalAddressBroadcast = subnet.IsBroadcast(dstAddr) || dstAddr == header.IPv4Broadcast 898 } else if !e.IsInGroup(dstAddr) { 899 if !e.Forwarding() { 900 stats.ip.InvalidDestinationAddressesReceived.Increment() 901 return 902 } 903 switch err := e.forwardPacket(pkt); err.(type) { 904 case nil: 905 return 906 case *ip.ErrLinkLocalSourceAddress: 907 stats.ip.Forwarding.LinkLocalSource.Increment() 908 case *ip.ErrLinkLocalDestinationAddress: 909 stats.ip.Forwarding.LinkLocalDestination.Increment() 910 case *ip.ErrTTLExceeded: 911 stats.ip.Forwarding.ExhaustedTTL.Increment() 912 case *ip.ErrNoRoute: 913 stats.ip.Forwarding.Unrouteable.Increment() 914 case *ip.ErrParameterProblem: 915 stats.ip.MalformedPacketsReceived.Increment() 916 case *ip.ErrMessageTooLong: 917 stats.ip.Forwarding.PacketTooBig.Increment() 918 default: 919 panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) 920 } 921 stats.ip.Forwarding.Errors.Increment() 922 return 923 } 924 925 // iptables filtering. All packets that reach here are intended for 926 // this machine and will not be forwarded. 927 if ok := e.protocol.stack.IPTables().Check(stack.Input, pkt, nil, "" /* preroutingAddr */, inNICName, "" /* outNicName */); !ok { 928 // iptables is telling us to drop the packet. 929 stats.ip.IPTablesInputDropped.Increment() 930 return 931 } 932 933 if h.More() || h.FragmentOffset() != 0 { 934 if pkt.Data().Size()+pkt.TransportHeader().View().Size() == 0 { 935 // Drop the packet as it's marked as a fragment but has 936 // no payload. 937 stats.ip.MalformedPacketsReceived.Increment() 938 stats.ip.MalformedFragmentsReceived.Increment() 939 return 940 } 941 if opts := h.Options(); len(opts) != 0 { 942 // If there are options we need to check them before we do assembly 943 // or we could be assembling errant packets. However we do not change the 944 // options as that could lead to double processing later. 945 if _, _, optProblem := e.processIPOptions(pkt, opts, &optionUsageVerify{}); optProblem != nil { 946 if optProblem.NeedICMP { 947 _ = e.protocol.returnError(&icmpReasonParamProblem{ 948 pointer: optProblem.Pointer, 949 }, pkt) 950 e.stats.ip.MalformedPacketsReceived.Increment() 951 } 952 return 953 } 954 } 955 // The packet is a fragment, let's try to reassemble it. 956 start := h.FragmentOffset() 957 // Drop the fragment if the size of the reassembled payload would exceed the 958 // maximum payload size. 959 // 960 // Note that this addition doesn't overflow even on 32bit architecture 961 // because pkt.Data().Size() should not exceed 65535 (the max IP datagram 962 // size). Otherwise the packet would've been rejected as invalid before 963 // reaching here. 964 if int(start)+pkt.Data().Size() > header.IPv4MaximumPayloadSize { 965 stats.ip.MalformedPacketsReceived.Increment() 966 stats.ip.MalformedFragmentsReceived.Increment() 967 return 968 } 969 970 proto := h.Protocol() 971 resPkt, _, ready, err := e.protocol.fragmentation.Process( 972 // As per RFC 791 section 2.3, the identification value is unique 973 // for a source-destination pair and protocol. 974 fragmentation.FragmentID{ 975 Source: h.SourceAddress(), 976 Destination: h.DestinationAddress(), 977 ID: uint32(h.ID()), 978 Protocol: proto, 979 }, 980 start, 981 start+uint16(pkt.Data().Size())-1, 982 h.More(), 983 proto, 984 pkt, 985 ) 986 if err != nil { 987 stats.ip.MalformedPacketsReceived.Increment() 988 stats.ip.MalformedFragmentsReceived.Increment() 989 return 990 } 991 if !ready { 992 return 993 } 994 pkt = resPkt 995 h = header.IPv4(pkt.NetworkHeader().View()) 996 997 // The reassembler doesn't take care of fixing up the header, so we need 998 // to do it here. 999 h.SetTotalLength(uint16(pkt.Data().Size() + len(h))) 1000 h.SetFlagsFragmentOffset(0, 0) 1001 1002 // Now that the packet is reassembled, it can be sent to raw sockets. 1003 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1004 } 1005 stats.ip.PacketsDelivered.Increment() 1006 1007 p := h.TransportProtocol() 1008 if p == header.ICMPv4ProtocolNumber { 1009 // TODO(github.com/SagerNet/issues/3810): when we sort out ICMP and transport 1010 // headers, the setting of the transport number here should be 1011 // unnecessary and removed. 1012 pkt.TransportProtocolNumber = p 1013 e.handleICMP(pkt) 1014 return 1015 } 1016 // ICMP handles options itself but do it here for all remaining destinations. 1017 var hasRouterAlertOption bool 1018 if opts := h.Options(); len(opts) != 0 { 1019 newOpts, processedOpts, optProblem := e.processIPOptions(pkt, opts, &optionUsageReceive{}) 1020 if optProblem != nil { 1021 if optProblem.NeedICMP { 1022 _ = e.protocol.returnError(&icmpReasonParamProblem{ 1023 pointer: optProblem.Pointer, 1024 }, pkt) 1025 stats.ip.MalformedPacketsReceived.Increment() 1026 } 1027 return 1028 } 1029 hasRouterAlertOption = processedOpts.routerAlert 1030 copied := copy(opts, newOpts) 1031 if copied != len(newOpts) { 1032 panic(fmt.Sprintf("copied %d bytes of new options, expected %d bytes", copied, len(newOpts))) 1033 } 1034 for i := copied; i < len(opts); i++ { 1035 // Pad with 0 (EOL). RFC 791 page 23 says "The padding is zero". 1036 opts[i] = byte(header.IPv4OptionListEndType) 1037 } 1038 } 1039 if p == header.IGMPProtocolNumber { 1040 e.mu.Lock() 1041 e.mu.igmp.handleIGMP(pkt, hasRouterAlertOption) 1042 e.mu.Unlock() 1043 return 1044 } 1045 1046 switch res := e.dispatcher.DeliverTransportPacket(p, pkt); res { 1047 case stack.TransportPacketHandled: 1048 case stack.TransportPacketDestinationPortUnreachable: 1049 // As per RFC: 1122 Section 3.2.2.1 A host SHOULD generate Destination 1050 // Unreachable messages with code: 1051 // 3 (Port Unreachable), when the designated transport protocol 1052 // (e.g., UDP) is unable to demultiplex the datagram but has no 1053 // protocol mechanism to inform the sender. 1054 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt) 1055 case stack.TransportPacketProtocolUnreachable: 1056 // As per RFC: 1122 Section 3.2.2.1 1057 // A host SHOULD generate Destination Unreachable messages with code: 1058 // 2 (Protocol Unreachable), when the designated transport protocol 1059 // is not supported 1060 _ = e.protocol.returnError(&icmpReasonProtoUnreachable{}, pkt) 1061 default: 1062 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1063 } 1064 } 1065 1066 // Close cleans up resources associated with the endpoint. 1067 func (e *endpoint) Close() { 1068 e.mu.Lock() 1069 e.disableLocked() 1070 e.mu.addressableEndpointState.Cleanup() 1071 e.mu.Unlock() 1072 1073 e.protocol.forgetEndpoint(e.nic.ID()) 1074 } 1075 1076 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1077 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, peb stack.PrimaryEndpointBehavior, configType stack.AddressConfigType, deprecated bool) (stack.AddressEndpoint, tcpip.Error) { 1078 e.mu.RLock() 1079 defer e.mu.RUnlock() 1080 1081 ep, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, peb, configType, deprecated) 1082 if err == nil { 1083 e.mu.igmp.sendQueuedReports() 1084 } 1085 return ep, err 1086 } 1087 1088 // RemovePermanentAddress implements stack.AddressableEndpoint. 1089 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1090 e.mu.RLock() 1091 defer e.mu.RUnlock() 1092 return e.mu.addressableEndpointState.RemovePermanentAddress(addr) 1093 } 1094 1095 // MainAddress implements stack.AddressableEndpoint. 1096 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1097 e.mu.RLock() 1098 defer e.mu.RUnlock() 1099 return e.mu.addressableEndpointState.MainAddress() 1100 } 1101 1102 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1103 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { 1104 e.mu.RLock() 1105 defer e.mu.RUnlock() 1106 1107 loopback := e.nic.IsLoopback() 1108 return e.mu.addressableEndpointState.AcquireAssignedAddressOrMatching(localAddr, func(addressEndpoint stack.AddressEndpoint) bool { 1109 subnet := addressEndpoint.Subnet() 1110 // IPv4 has a notion of a subnet broadcast address and considers the 1111 // loopback interface bound to an address's whole subnet (on linux). 1112 return subnet.IsBroadcast(localAddr) || (loopback && subnet.Contains(localAddr)) 1113 }, allowTemp, tempPEB) 1114 } 1115 1116 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1117 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1118 e.mu.RLock() 1119 defer e.mu.RUnlock() 1120 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired) 1121 } 1122 1123 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1124 // but with locking requirements 1125 // 1126 // Precondition: igmp.ep.mu must be read locked. 1127 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1128 return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired) 1129 } 1130 1131 // PrimaryAddresses implements stack.AddressableEndpoint. 1132 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1133 e.mu.RLock() 1134 defer e.mu.RUnlock() 1135 return e.mu.addressableEndpointState.PrimaryAddresses() 1136 } 1137 1138 // PermanentAddresses implements stack.AddressableEndpoint. 1139 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1140 e.mu.RLock() 1141 defer e.mu.RUnlock() 1142 return e.mu.addressableEndpointState.PermanentAddresses() 1143 } 1144 1145 // JoinGroup implements stack.GroupAddressableEndpoint. 1146 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1147 e.mu.Lock() 1148 defer e.mu.Unlock() 1149 return e.joinGroupLocked(addr) 1150 } 1151 1152 // joinGroupLocked is like JoinGroup but with locking requirements. 1153 // 1154 // Precondition: e.mu must be locked. 1155 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1156 if !header.IsV4MulticastAddress(addr) { 1157 return &tcpip.ErrBadAddress{} 1158 } 1159 1160 e.mu.igmp.joinGroup(addr) 1161 return nil 1162 } 1163 1164 // LeaveGroup implements stack.GroupAddressableEndpoint. 1165 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1166 e.mu.Lock() 1167 defer e.mu.Unlock() 1168 return e.leaveGroupLocked(addr) 1169 } 1170 1171 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1172 // 1173 // Precondition: e.mu must be locked. 1174 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1175 return e.mu.igmp.leaveGroup(addr) 1176 } 1177 1178 // IsInGroup implements stack.GroupAddressableEndpoint. 1179 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1180 e.mu.RLock() 1181 defer e.mu.RUnlock() 1182 return e.mu.igmp.isInGroup(addr) 1183 } 1184 1185 // Stats implements stack.NetworkEndpoint. 1186 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1187 return &e.stats.localStats 1188 } 1189 1190 var _ stack.NetworkProtocol = (*protocol)(nil) 1191 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1192 1193 type protocol struct { 1194 stack *stack.Stack 1195 1196 mu struct { 1197 sync.RWMutex 1198 1199 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 1200 // when handling a packet, by looking at which NIC handled the packet. 1201 eps map[tcpip.NICID]*endpoint 1202 } 1203 1204 // defaultTTL is the current default TTL for the protocol. Only the 1205 // uint8 portion of it is meaningful. 1206 // 1207 // Must be accessed using atomic operations. 1208 defaultTTL uint32 1209 1210 ids []uint32 1211 hashIV uint32 1212 1213 fragmentation *fragmentation.Fragmentation 1214 1215 options Options 1216 } 1217 1218 // Number returns the ipv4 protocol number. 1219 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 1220 return ProtocolNumber 1221 } 1222 1223 // MinimumPacketSize returns the minimum valid ipv4 packet size. 1224 func (p *protocol) MinimumPacketSize() int { 1225 return header.IPv4MinimumSize 1226 } 1227 1228 // DefaultPrefixLen returns the IPv4 default prefix length. 1229 func (p *protocol) DefaultPrefixLen() int { 1230 return header.IPv4AddressSize * 8 1231 } 1232 1233 // ParseAddresses implements stack.NetworkProtocol. 1234 func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { 1235 h := header.IPv4(v) 1236 return h.SourceAddress(), h.DestinationAddress() 1237 } 1238 1239 // SetOption implements stack.NetworkProtocol. 1240 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 1241 switch v := option.(type) { 1242 case *tcpip.DefaultTTLOption: 1243 p.SetDefaultTTL(uint8(*v)) 1244 return nil 1245 default: 1246 return &tcpip.ErrUnknownProtocolOption{} 1247 } 1248 } 1249 1250 // Option implements stack.NetworkProtocol. 1251 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 1252 switch v := option.(type) { 1253 case *tcpip.DefaultTTLOption: 1254 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 1255 return nil 1256 default: 1257 return &tcpip.ErrUnknownProtocolOption{} 1258 } 1259 } 1260 1261 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 1262 func (p *protocol) SetDefaultTTL(ttl uint8) { 1263 atomic.StoreUint32(&p.defaultTTL, uint32(ttl)) 1264 } 1265 1266 // DefaultTTL returns the default TTL for endpoints created with this protocol. 1267 func (p *protocol) DefaultTTL() uint8 { 1268 return uint8(atomic.LoadUint32(&p.defaultTTL)) 1269 } 1270 1271 // Close implements stack.TransportProtocol. 1272 func (*protocol) Close() {} 1273 1274 // Wait implements stack.TransportProtocol. 1275 func (*protocol) Wait() {} 1276 1277 // parseAndValidate parses the packet (including its transport layer header) and 1278 // returns the parsed IP header. 1279 // 1280 // Returns true if the IP header was successfully parsed. 1281 func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (header.IPv4, bool) { 1282 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 1283 if !ok { 1284 return nil, false 1285 } 1286 1287 h := header.IPv4(pkt.NetworkHeader().View()) 1288 // Do not include the link header's size when calculating the size of the IP 1289 // packet. 1290 if !h.IsValid(pkt.Size() - pkt.LinkHeader().View().Size()) { 1291 return nil, false 1292 } 1293 1294 if !h.IsChecksumValid() { 1295 return nil, false 1296 } 1297 1298 if hasTransportHdr { 1299 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 1300 case stack.ParsedOK: 1301 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 1302 // The transport layer will handle unknown protocols and transport layer 1303 // parsing errors. 1304 default: 1305 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 1306 } 1307 } 1308 1309 return h, true 1310 } 1311 1312 // Parse implements stack.NetworkProtocol. 1313 func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 1314 if ok := parse.IPv4(pkt); !ok { 1315 return 0, false, false 1316 } 1317 1318 ipHdr := header.IPv4(pkt.NetworkHeader().View()) 1319 return ipHdr.TransportProtocol(), !ipHdr.More() && ipHdr.FragmentOffset() == 0, true 1320 } 1321 1322 // calculateNetworkMTU calculates the network-layer payload MTU based on the 1323 // link-layer payload mtu. 1324 func calculateNetworkMTU(linkMTU, networkHeaderSize uint32) (uint32, tcpip.Error) { 1325 if linkMTU < header.IPv4MinimumMTU { 1326 return 0, &tcpip.ErrInvalidEndpointState{} 1327 } 1328 1329 // As per RFC 791 section 3.1, an IPv4 header cannot exceed 60 bytes in 1330 // length: 1331 // The maximal internet header is 60 octets, and a typical internet header 1332 // is 20 octets, allowing a margin for headers of higher level protocols. 1333 if networkHeaderSize > header.IPv4MaximumHeaderSize { 1334 return 0, &tcpip.ErrMalformedHeader{} 1335 } 1336 1337 networkMTU := linkMTU 1338 if networkMTU > MaxTotalSize { 1339 networkMTU = MaxTotalSize 1340 } 1341 1342 return networkMTU - networkHeaderSize, nil 1343 } 1344 1345 func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { 1346 payload := pkt.TransportHeader().View().Size() + pkt.Data().Size() 1347 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 1348 } 1349 1350 // addressToUint32 translates an IPv4 address into its little endian uint32 1351 // representation. 1352 // 1353 // This function does the same thing as binary.LittleEndian.Uint32 but operates 1354 // on a tcpip.Address (a string) without the need to convert it to a byte slice, 1355 // which would cause an allocation. 1356 func addressToUint32(addr tcpip.Address) uint32 { 1357 _ = addr[3] // bounds check hint to compiler 1358 return uint32(addr[0]) | uint32(addr[1])<<8 | uint32(addr[2])<<16 | uint32(addr[3])<<24 1359 } 1360 1361 // hashRoute calculates a hash value for the given source/destination pair using 1362 // the addresses, transport protocol number and a 32-bit number to generate the 1363 // hash. 1364 func hashRoute(srcAddr, dstAddr tcpip.Address, protocol tcpip.TransportProtocolNumber, hashIV uint32) uint32 { 1365 a := addressToUint32(srcAddr) 1366 b := addressToUint32(dstAddr) 1367 return hash.Hash3Words(a, b, uint32(protocol), hashIV) 1368 } 1369 1370 // Options holds options to configure a new protocol. 1371 type Options struct { 1372 // IGMP holds options for IGMP. 1373 IGMP IGMPOptions 1374 1375 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 1376 // martian loopback packets) should be accepted. 1377 AllowExternalLoopbackTraffic bool 1378 } 1379 1380 // NewProtocolWithOptions returns an IPv4 network protocol. 1381 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 1382 ids := make([]uint32, buckets) 1383 1384 // Randomly initialize hashIV and the ids. 1385 r := hash.RandN32(1 + buckets) 1386 for i := range ids { 1387 ids[i] = r[i] 1388 } 1389 hashIV := r[buckets] 1390 1391 return func(s *stack.Stack) stack.NetworkProtocol { 1392 p := &protocol{ 1393 stack: s, 1394 ids: ids, 1395 hashIV: hashIV, 1396 defaultTTL: DefaultTTL, 1397 options: opts, 1398 } 1399 p.fragmentation = fragmentation.NewFragmentation(fragmentblockSize, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 1400 p.mu.eps = make(map[tcpip.NICID]*endpoint) 1401 return p 1402 } 1403 } 1404 1405 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 1406 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 1407 return NewProtocolWithOptions(Options{})(s) 1408 } 1409 1410 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeader header.IPv4) (*stack.PacketBuffer, bool) { 1411 fragPkt, offset, copied, more := pf.BuildNextFragment() 1412 fragPkt.NetworkProtocolNumber = ProtocolNumber 1413 1414 originalIPHeaderLength := len(originalIPHeader) 1415 nextFragIPHeader := header.IPv4(fragPkt.NetworkHeader().Push(originalIPHeaderLength)) 1416 fragPkt.NetworkProtocolNumber = ProtocolNumber 1417 1418 if copied := copy(nextFragIPHeader, originalIPHeader); copied != len(originalIPHeader) { 1419 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got = %d, want = %d", copied, originalIPHeaderLength)) 1420 } 1421 1422 flags := originalIPHeader.Flags() 1423 if more { 1424 flags |= header.IPv4FlagMoreFragments 1425 } 1426 nextFragIPHeader.SetFlagsFragmentOffset(flags, uint16(offset)) 1427 nextFragIPHeader.SetTotalLength(uint16(nextFragIPHeader.HeaderLength()) + uint16(copied)) 1428 nextFragIPHeader.SetChecksum(0) 1429 nextFragIPHeader.SetChecksum(^nextFragIPHeader.CalculateChecksum()) 1430 1431 return fragPkt, more 1432 } 1433 1434 // optionAction describes possible actions that may be taken on an option 1435 // while processing it. 1436 type optionAction uint8 1437 1438 const ( 1439 // optionRemove says that the option should not be in the output option set. 1440 optionRemove optionAction = iota 1441 1442 // optionProcess says that the option should be fully processed. 1443 optionProcess 1444 1445 // optionVerify says the option should be checked and passed unchanged. 1446 optionVerify 1447 1448 // optionPass says to pass the output set without checking. 1449 optionPass 1450 ) 1451 1452 // optionActions list what to do for each option in a given scenario. 1453 type optionActions struct { 1454 // timestamp controls what to do with a Timestamp option. 1455 timestamp optionAction 1456 1457 // recordRoute controls what to do with a Record Route option. 1458 recordRoute optionAction 1459 1460 // routerAlert controls what to do with a Router Alert option. 1461 routerAlert optionAction 1462 1463 // unknown controls what to do with an unknown option. 1464 unknown optionAction 1465 } 1466 1467 // optionsUsage specifies the ways options may be operated upon for a given 1468 // scenario during packet processing. 1469 type optionsUsage interface { 1470 actions() optionActions 1471 } 1472 1473 // optionUsageVerify implements optionsUsage for when we just want to check 1474 // fragments. Don't change anything, just check and reject if bad. No 1475 // replacement options are generated. 1476 type optionUsageVerify struct{} 1477 1478 // actions implements optionsUsage. 1479 func (*optionUsageVerify) actions() optionActions { 1480 return optionActions{ 1481 timestamp: optionVerify, 1482 recordRoute: optionVerify, 1483 routerAlert: optionVerify, 1484 unknown: optionRemove, 1485 } 1486 } 1487 1488 // optionUsageReceive implements optionsUsage for packets we will pass 1489 // to the transport layer (with the exception of Echo requests). 1490 type optionUsageReceive struct{} 1491 1492 // actions implements optionsUsage. 1493 func (*optionUsageReceive) actions() optionActions { 1494 return optionActions{ 1495 timestamp: optionProcess, 1496 recordRoute: optionProcess, 1497 routerAlert: optionVerify, 1498 unknown: optionPass, 1499 } 1500 } 1501 1502 // optionUsageForward implements optionsUsage for packets about to be forwarded. 1503 // All options are passed on regardless of whether we recognise them, however 1504 // we do process the Timestamp and Record Route options. 1505 type optionUsageForward struct{} 1506 1507 // actions implements optionsUsage. 1508 func (*optionUsageForward) actions() optionActions { 1509 return optionActions{ 1510 timestamp: optionProcess, 1511 recordRoute: optionProcess, 1512 routerAlert: optionVerify, 1513 unknown: optionPass, 1514 } 1515 } 1516 1517 // optionUsageEcho implements optionsUsage for echo packet processing. 1518 // Only Timestamp and RecordRoute are processed and sent back. 1519 type optionUsageEcho struct{} 1520 1521 // actions implements optionsUsage. 1522 func (*optionUsageEcho) actions() optionActions { 1523 return optionActions{ 1524 timestamp: optionProcess, 1525 recordRoute: optionProcess, 1526 routerAlert: optionVerify, 1527 unknown: optionRemove, 1528 } 1529 } 1530 1531 // handleTimestamp does any required processing on a Timestamp option 1532 // in place. 1533 func handleTimestamp(tsOpt header.IPv4OptionTimestamp, localAddress tcpip.Address, clock tcpip.Clock, usage optionsUsage) *header.IPv4OptParameterProblem { 1534 flags := tsOpt.Flags() 1535 var entrySize uint8 1536 switch flags { 1537 case header.IPv4OptionTimestampOnlyFlag: 1538 entrySize = header.IPv4OptionTimestampSize 1539 case 1540 header.IPv4OptionTimestampWithIPFlag, 1541 header.IPv4OptionTimestampWithPredefinedIPFlag: 1542 entrySize = header.IPv4OptionTimestampWithAddrSize 1543 default: 1544 return &header.IPv4OptParameterProblem{ 1545 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 1546 NeedICMP: true, 1547 } 1548 } 1549 1550 pointer := tsOpt.Pointer() 1551 // RFC 791 page 22 states: "The smallest legal value is 5." 1552 // Since the pointer is 1 based, and the header is 4 bytes long the 1553 // pointer must point beyond the header therefore 4 or less is bad. 1554 if pointer <= header.IPv4OptionTimestampHdrLength { 1555 return &header.IPv4OptParameterProblem{ 1556 Pointer: header.IPv4OptTSPointerOffset, 1557 NeedICMP: true, 1558 } 1559 } 1560 // To simplify processing below, base further work on the array of timestamps 1561 // beyond the header, rather than on the whole option. Also to aid 1562 // calculations set 'nextSlot' to be 0 based as in the packet it is 1 based. 1563 nextSlot := pointer - (header.IPv4OptionTimestampHdrLength + 1) 1564 optLen := tsOpt.Size() 1565 dataLength := optLen - header.IPv4OptionTimestampHdrLength 1566 1567 // In the section below, we verify the pointer, length and overflow counter 1568 // fields of the option. The distinction is in which byte you return as being 1569 // in error in the ICMP packet. Offsets 1 (length), 2 pointer) 1570 // or 3 (overflowed counter). 1571 // 1572 // The following RFC sections cover this section: 1573 // 1574 // RFC 791 (page 22): 1575 // If there is some room but not enough room for a full timestamp 1576 // to be inserted, or the overflow count itself overflows, the 1577 // original datagram is considered to be in error and is discarded. 1578 // In either case an ICMP parameter problem message may be sent to 1579 // the source host [3]. 1580 // 1581 // You can get this situation in two ways. Firstly if the data area is not 1582 // a multiple of the entry size or secondly, if the pointer is not at a 1583 // multiple of the entry size. The wording of the RFC suggests that 1584 // this is not an error until you actually run out of space. 1585 if pointer > optLen { 1586 // RFC 791 (page 22) says we should switch to using the overflow count. 1587 // If the timestamp data area is already full (the pointer exceeds 1588 // the length) the datagram is forwarded without inserting the 1589 // timestamp, but the overflow count is incremented by one. 1590 if flags == header.IPv4OptionTimestampWithPredefinedIPFlag { 1591 // By definition we have nothing to do. 1592 return nil 1593 } 1594 1595 if tsOpt.IncOverflow() != 0 { 1596 return nil 1597 } 1598 // The overflow count is also full. 1599 return &header.IPv4OptParameterProblem{ 1600 Pointer: header.IPv4OptTSOFLWAndFLGOffset, 1601 NeedICMP: true, 1602 } 1603 } 1604 if nextSlot+entrySize > dataLength { 1605 // The data area isn't full but there isn't room for a new entry. 1606 // Either Length or Pointer could be bad. 1607 if false { 1608 // We must select Pointer for Linux compatibility, even if 1609 // only the length is bad. 1610 // The Linux code is at (in October 2020) 1611 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L367-L370 1612 // if (optptr[2]+3 > optlen) { 1613 // pp_ptr = optptr + 2; 1614 // goto error; 1615 // } 1616 // which doesn't distinguish between which of optptr[2] or optlen 1617 // is wrong, but just arbitrarily decides on optptr+2. 1618 if dataLength%entrySize != 0 { 1619 // The Data section size should be a multiple of the expected 1620 // timestamp entry size. 1621 return &header.IPv4OptParameterProblem{ 1622 Pointer: header.IPv4OptionLengthOffset, 1623 NeedICMP: false, 1624 } 1625 } 1626 // If the size is OK, the pointer must be corrupted. 1627 } 1628 return &header.IPv4OptParameterProblem{ 1629 Pointer: header.IPv4OptTSPointerOffset, 1630 NeedICMP: true, 1631 } 1632 } 1633 1634 if usage.actions().timestamp == optionProcess { 1635 tsOpt.UpdateTimestamp(localAddress, clock) 1636 } 1637 return nil 1638 } 1639 1640 // handleRecordRoute checks and processes a Record route option. It is much 1641 // like the timestamp type 1 option, but without timestamps. The passed in 1642 // address is stored in the option in the correct spot if possible. 1643 func handleRecordRoute(rrOpt header.IPv4OptionRecordRoute, localAddress tcpip.Address, usage optionsUsage) *header.IPv4OptParameterProblem { 1644 optlen := rrOpt.Size() 1645 1646 if optlen < header.IPv4AddressSize+header.IPv4OptionRecordRouteHdrLength { 1647 return &header.IPv4OptParameterProblem{ 1648 Pointer: header.IPv4OptionLengthOffset, 1649 NeedICMP: true, 1650 } 1651 } 1652 1653 pointer := rrOpt.Pointer() 1654 // RFC 791 page 20 states: 1655 // The pointer is relative to this option, and the 1656 // smallest legal value for the pointer is 4. 1657 // Since the pointer is 1 based, and the header is 3 bytes long the 1658 // pointer must point beyond the header therefore 3 or less is bad. 1659 if pointer <= header.IPv4OptionRecordRouteHdrLength { 1660 return &header.IPv4OptParameterProblem{ 1661 Pointer: header.IPv4OptRRPointerOffset, 1662 NeedICMP: true, 1663 } 1664 } 1665 1666 // RFC 791 page 21 says 1667 // If the route data area is already full (the pointer exceeds the 1668 // length) the datagram is forwarded without inserting the address 1669 // into the recorded route. If there is some room but not enough 1670 // room for a full address to be inserted, the original datagram is 1671 // considered to be in error and is discarded. In either case an 1672 // ICMP parameter problem message may be sent to the source 1673 // host. 1674 // The use of the words "In either case" suggests that a 'full' RR option 1675 // could generate an ICMP at every hop after it fills up. We chose to not 1676 // do this (as do most implementations). It is probable that the inclusion 1677 // of these words is a copy/paste error from the timestamp option where 1678 // there are two failure reasons given. 1679 if pointer > optlen { 1680 return nil 1681 } 1682 1683 // The data area isn't full but there isn't room for a new entry. 1684 // Either Length or Pointer could be bad. We must select Pointer for Linux 1685 // compatibility, even if only the length is bad. NB. pointer is 1 based. 1686 if pointer+header.IPv4AddressSize > optlen+1 { 1687 if false { 1688 // This is what we would do if we were not being Linux compatible. 1689 // Check for bad pointer or length value. Must be a multiple of 4 after 1690 // accounting for the 3 byte header and not within that header. 1691 // RFC 791, page 20 says: 1692 // The pointer is relative to this option, and the 1693 // smallest legal value for the pointer is 4. 1694 // 1695 // A recorded route is composed of a series of internet addresses. 1696 // Each internet address is 32 bits or 4 octets. 1697 // Linux skips this test so we must too. See Linux code at: 1698 // https://github.com/torvalds/linux/blob/bbf5c979011a099af5dc76498918ed7df445635b/net/ipv4/ip_options.c#L338-L341 1699 // if (optptr[2]+3 > optlen) { 1700 // pp_ptr = optptr + 2; 1701 // goto error; 1702 // } 1703 if (optlen-header.IPv4OptionRecordRouteHdrLength)%header.IPv4AddressSize != 0 { 1704 // Length is bad, not on integral number of slots. 1705 return &header.IPv4OptParameterProblem{ 1706 Pointer: header.IPv4OptionLengthOffset, 1707 NeedICMP: true, 1708 } 1709 } 1710 // If not length, the fault must be with the pointer. 1711 } 1712 return &header.IPv4OptParameterProblem{ 1713 Pointer: header.IPv4OptRRPointerOffset, 1714 NeedICMP: true, 1715 } 1716 } 1717 if usage.actions().recordRoute == optionVerify { 1718 return nil 1719 } 1720 rrOpt.StoreAddress(localAddress) 1721 return nil 1722 } 1723 1724 // handleRouterAlert performs sanity checks on a Router Alert option. 1725 func handleRouterAlert(raOpt header.IPv4OptionRouterAlert) *header.IPv4OptParameterProblem { 1726 // Only the zero value is acceptable, as per RFC 2113, section 2.1: 1727 // Value: A two octet code with the following values: 1728 // 0 - Router shall examine packet 1729 // 1-65535 - Reserved 1730 if raOpt.Value() != header.IPv4OptionRouterAlertValue { 1731 return &header.IPv4OptParameterProblem{ 1732 Pointer: header.IPv4OptionRouterAlertValueOffset, 1733 NeedICMP: true, 1734 } 1735 } 1736 return nil 1737 } 1738 1739 type optionTracker struct { 1740 timestamp bool 1741 recordRoute bool 1742 routerAlert bool 1743 } 1744 1745 // processIPOptions parses the IPv4 options and produces a new set of options 1746 // suitable for use in the next step of packet processing as informed by usage. 1747 // The original will not be touched. 1748 // 1749 // If there were no errors during parsing, the new set of options is returned as 1750 // a new buffer. 1751 func (e *endpoint) processIPOptions(pkt *stack.PacketBuffer, opts header.IPv4Options, usage optionsUsage) (header.IPv4Options, optionTracker, *header.IPv4OptParameterProblem) { 1752 stats := e.stats.ip 1753 optIter := opts.MakeIterator() 1754 1755 // Except NOP, each option must only appear at most once (RFC 791 section 3.1, 1756 // at the definition of every type). 1757 // Keep track of each option we find to enable duplicate option detection. 1758 var seenOptions [math.MaxUint8 + 1]bool 1759 1760 // TODO(https://github.com/SagerNet/issue/4586): This will need tweaking when we start 1761 // really forwarding packets as we may need to get two addresses, for rx and 1762 // tx interfaces. We will also have to take usage into account. 1763 localAddress := e.MainAddress().Address 1764 if len(localAddress) == 0 { 1765 h := header.IPv4(pkt.NetworkHeader().View()) 1766 dstAddr := h.DestinationAddress() 1767 if pkt.NetworkPacketInfo.LocalAddressBroadcast || header.IsV4MulticastAddress(dstAddr) { 1768 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 1769 NeedICMP: false, 1770 } 1771 } 1772 localAddress = dstAddr 1773 } 1774 1775 var optionsProcessed optionTracker 1776 for { 1777 option, done, optProblem := optIter.Next() 1778 if done || optProblem != nil { 1779 return optIter.Finalize(), optionsProcessed, optProblem 1780 } 1781 optType := option.Type() 1782 if optType == header.IPv4OptionNOPType { 1783 optIter.PushNOPOrEnd(optType) 1784 continue 1785 } 1786 if optType == header.IPv4OptionListEndType { 1787 optIter.PushNOPOrEnd(optType) 1788 return optIter.Finalize(), optionsProcessed, nil 1789 } 1790 1791 // check for repeating options (multiple NOPs are OK) 1792 if seenOptions[optType] { 1793 return nil, optionTracker{}, &header.IPv4OptParameterProblem{ 1794 Pointer: optIter.ErrCursor, 1795 NeedICMP: true, 1796 } 1797 } 1798 seenOptions[optType] = true 1799 1800 optLen, optProblem := func() (int, *header.IPv4OptParameterProblem) { 1801 switch option := option.(type) { 1802 case *header.IPv4OptionTimestamp: 1803 stats.OptionTimestampReceived.Increment() 1804 optionsProcessed.timestamp = true 1805 if usage.actions().timestamp != optionRemove { 1806 clock := e.protocol.stack.Clock() 1807 newBuffer := optIter.InitReplacement(option) 1808 optProblem := handleTimestamp(header.IPv4OptionTimestamp(newBuffer), localAddress, clock, usage) 1809 return len(newBuffer), optProblem 1810 } 1811 1812 case *header.IPv4OptionRecordRoute: 1813 stats.OptionRecordRouteReceived.Increment() 1814 optionsProcessed.recordRoute = true 1815 if usage.actions().recordRoute != optionRemove { 1816 newBuffer := optIter.InitReplacement(option) 1817 optProblem := handleRecordRoute(header.IPv4OptionRecordRoute(newBuffer), localAddress, usage) 1818 return len(newBuffer), optProblem 1819 } 1820 1821 case *header.IPv4OptionRouterAlert: 1822 stats.OptionRouterAlertReceived.Increment() 1823 optionsProcessed.routerAlert = true 1824 if usage.actions().routerAlert != optionRemove { 1825 newBuffer := optIter.InitReplacement(option) 1826 optProblem := handleRouterAlert(header.IPv4OptionRouterAlert(newBuffer)) 1827 return len(newBuffer), optProblem 1828 } 1829 1830 default: 1831 stats.OptionUnknownReceived.Increment() 1832 if usage.actions().unknown == optionPass { 1833 return len(optIter.InitReplacement(option)), nil 1834 } 1835 } 1836 return 0, nil 1837 }() 1838 1839 if optProblem != nil { 1840 optProblem.Pointer += optIter.ErrCursor 1841 return nil, optionTracker{}, optProblem 1842 } 1843 optIter.ConsumeBuffer(optLen) 1844 } 1845 }