inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/network/ipv6/ipv6.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package ipv6 contains the implementation of the ipv6 network protocol. 16 package ipv6 17 18 import ( 19 "encoding/binary" 20 "fmt" 21 "hash/fnv" 22 "math" 23 "reflect" 24 "sort" 25 "sync/atomic" 26 "time" 27 28 "inet.af/netstack/sync" 29 "inet.af/netstack/tcpip" 30 "inet.af/netstack/tcpip/buffer" 31 "inet.af/netstack/tcpip/header" 32 "inet.af/netstack/tcpip/header/parse" 33 "inet.af/netstack/tcpip/network/hash" 34 "inet.af/netstack/tcpip/network/internal/fragmentation" 35 "inet.af/netstack/tcpip/network/internal/ip" 36 "inet.af/netstack/tcpip/stack" 37 ) 38 39 const ( 40 // ReassembleTimeout controls how long a fragment will be held. 41 // As per RFC 8200 section 4.5: 42 // 43 // If insufficient fragments are received to complete reassembly of a packet 44 // within 60 seconds of the reception of the first-arriving fragment of that 45 // packet, reassembly of that packet must be abandoned. 46 // 47 // Linux also uses 60 seconds for reassembly timeout: 48 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/include/net/ipv6.h#L456 49 ReassembleTimeout = 60 * time.Second 50 51 // ProtocolNumber is the ipv6 protocol number. 52 ProtocolNumber = header.IPv6ProtocolNumber 53 54 // maxPayloadSize is the maximum size that can be encoded in the 16-bit 55 // PayloadLength field of the ipv6 header. 56 maxPayloadSize = 0xffff 57 58 // DefaultTTL is the default hop limit for IPv6 Packets egressed by 59 // Netstack. 60 DefaultTTL = 64 61 62 // buckets for fragment identifiers 63 buckets = 2048 64 ) 65 66 const ( 67 forwardingDisabled = 0 68 forwardingEnabled = 1 69 ) 70 71 // policyTable is the default policy table defined in RFC 6724 section 2.1. 72 // 73 // A more human-readable version: 74 // 75 // Prefix Precedence Label 76 // ::1/128 50 0 77 // ::/0 40 1 78 // ::ffff:0:0/96 35 4 79 // 2002::/16 30 2 80 // 2001::/32 5 5 81 // fc00::/7 3 13 82 // ::/96 1 3 83 // fec0::/10 1 11 84 // 3ffe::/16 1 12 85 // 86 // The table is sorted by prefix length so longest-prefix match can be easily 87 // achieved. 88 // 89 // We willingly left out ::/96, fec0::/10 and 3ffe::/16 since those prefix 90 // assignments are deprecated. 91 // 92 // As per RFC 4291 section 2.5.5.1 (for ::/96), 93 // 94 // The "IPv4-Compatible IPv6 address" is now deprecated because the 95 // current IPv6 transition mechanisms no longer use these addresses. 96 // New or updated implementations are not required to support this 97 // address type. 98 // 99 // As per RFC 3879 section 4 (for fec0::/10), 100 // 101 // This document formally deprecates the IPv6 site-local unicast prefix 102 // defined in [RFC3513], i.e., 1111111011 binary or FEC0::/10. 103 // 104 // As per RFC 3701 section 1 (for 3ffe::/16), 105 // 106 // As clearly stated in [TEST-NEW], the addresses for the 6bone are 107 // temporary and will be reclaimed in the future. It further states 108 // that all users of these addresses (within the 3FFE::/16 prefix) will 109 // be required to renumber at some time in the future. 110 // 111 // and section 2, 112 // 113 // Thus after the pTLA allocation cutoff date January 1, 2004, it is 114 // REQUIRED that no new 6bone 3FFE pTLAs be allocated. 115 // 116 // MUST NOT BE MODIFIED. 117 var policyTable = [...]struct { 118 subnet tcpip.Subnet 119 120 label uint8 121 }{ 122 // ::1/128 123 { 124 subnet: header.IPv6Loopback.WithPrefix().Subnet(), 125 label: 0, 126 }, 127 // ::ffff:0:0/96 128 { 129 subnet: header.IPv4MappedIPv6Subnet, 130 label: 4, 131 }, 132 // 2001::/32 (Teredo prefix as per RFC 4380 section 2.6). 133 { 134 subnet: tcpip.AddressWithPrefix{ 135 Address: "\x20\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 136 PrefixLen: 32, 137 }.Subnet(), 138 label: 5, 139 }, 140 // 2002::/16 (6to4 prefix as per RFC 3056 section 2). 141 { 142 subnet: tcpip.AddressWithPrefix{ 143 Address: "\x20\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 144 PrefixLen: 16, 145 }.Subnet(), 146 label: 2, 147 }, 148 // fc00::/7 (Unique local addresses as per RFC 4193 section 3.1). 149 { 150 subnet: tcpip.AddressWithPrefix{ 151 Address: "\xfc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 152 PrefixLen: 7, 153 }.Subnet(), 154 label: 13, 155 }, 156 // ::/0 157 { 158 subnet: header.IPv6EmptySubnet, 159 label: 1, 160 }, 161 } 162 163 func getLabel(addr tcpip.Address) uint8 { 164 for _, p := range policyTable { 165 if p.subnet.Contains(addr) { 166 return p.label 167 } 168 } 169 170 panic(fmt.Sprintf("should have a label for address = %s", addr)) 171 } 172 173 var _ stack.DuplicateAddressDetector = (*endpoint)(nil) 174 var _ stack.LinkAddressResolver = (*endpoint)(nil) 175 var _ stack.LinkResolvableNetworkEndpoint = (*endpoint)(nil) 176 var _ stack.ForwardingNetworkEndpoint = (*endpoint)(nil) 177 var _ stack.GroupAddressableEndpoint = (*endpoint)(nil) 178 var _ stack.AddressableEndpoint = (*endpoint)(nil) 179 var _ stack.NetworkEndpoint = (*endpoint)(nil) 180 var _ stack.NDPEndpoint = (*endpoint)(nil) 181 var _ NDPEndpoint = (*endpoint)(nil) 182 183 type endpoint struct { 184 nic stack.NetworkInterface 185 dispatcher stack.TransportDispatcher 186 protocol *protocol 187 stats sharedStats 188 189 // enabled is set to 1 when the endpoint is enabled and 0 when it is 190 // disabled. 191 // 192 // Must be accessed using atomic operations. 193 enabled uint32 194 195 // forwarding is set to forwardingEnabled when the endpoint has forwarding 196 // enabled and forwardingDisabled when it is disabled. 197 // 198 // Must be accessed using atomic operations. 199 forwarding uint32 200 201 mu struct { 202 sync.RWMutex 203 204 addressableEndpointState stack.AddressableEndpointState 205 ndp ndpState 206 mld mldState 207 } 208 209 // dad is used to check if an arbitrary address is already assigned to some 210 // neighbor. 211 // 212 // Note: this is different from mu.ndp.dad which is used to perform DAD for 213 // addresses that are assigned to the interface. Removing an address aborts 214 // DAD; if we had used the same state, handlers for a removed address would 215 // not be called with the actual DAD result. 216 // 217 // LOCK ORDERING: mu > dad.mu. 218 dad struct { 219 mu struct { 220 sync.Mutex 221 222 dad ip.DAD 223 } 224 } 225 } 226 227 // NICNameFromID is a function that returns a stable name for the specified NIC, 228 // even if different NIC IDs are used to refer to the same NIC in different 229 // program runs. It is used when generating opaque interface identifiers (IIDs). 230 // If the NIC was created with a name, it is passed to NICNameFromID. 231 // 232 // NICNameFromID SHOULD return unique NIC names so unique opaque IIDs are 233 // generated for the same prefix on different NICs. 234 type NICNameFromID func(tcpip.NICID, string) string 235 236 // OpaqueInterfaceIdentifierOptions holds the options related to the generation 237 // of opaque interface identifiers (IIDs) as defined by RFC 7217. 238 type OpaqueInterfaceIdentifierOptions struct { 239 // NICNameFromID is a function that returns a stable name for a specified NIC, 240 // even if the NIC ID changes over time. 241 // 242 // Must be specified to generate the opaque IID. 243 NICNameFromID NICNameFromID 244 245 // SecretKey is a pseudo-random number used as the secret key when generating 246 // opaque IIDs as defined by RFC 7217. The key SHOULD be at least 247 // header.OpaqueIIDSecretKeyMinBytes bytes and MUST follow minimum randomness 248 // requirements for security as outlined by RFC 4086. SecretKey MUST NOT 249 // change between program runs, unless explicitly changed. 250 // 251 // OpaqueInterfaceIdentifierOptions takes ownership of SecretKey. SecretKey 252 // MUST NOT be modified after Stack is created. 253 // 254 // May be nil, but a nil value is highly discouraged to maintain 255 // some level of randomness between nodes. 256 SecretKey []byte 257 } 258 259 // CheckDuplicateAddress implements stack.DuplicateAddressDetector. 260 func (e *endpoint) CheckDuplicateAddress(addr tcpip.Address, h stack.DADCompletionHandler) stack.DADCheckAddressDisposition { 261 e.dad.mu.Lock() 262 defer e.dad.mu.Unlock() 263 return e.dad.mu.dad.CheckDuplicateAddressLocked(addr, h) 264 } 265 266 // SetDADConfigurations implements stack.DuplicateAddressDetector. 267 func (e *endpoint) SetDADConfigurations(c stack.DADConfigurations) { 268 e.mu.Lock() 269 defer e.mu.Unlock() 270 e.dad.mu.Lock() 271 defer e.dad.mu.Unlock() 272 273 e.mu.ndp.dad.SetConfigsLocked(c) 274 e.dad.mu.dad.SetConfigsLocked(c) 275 } 276 277 // DuplicateAddressProtocol implements stack.DuplicateAddressDetector. 278 func (*endpoint) DuplicateAddressProtocol() tcpip.NetworkProtocolNumber { 279 return ProtocolNumber 280 } 281 282 // HandleLinkResolutionFailure implements stack.LinkResolvableNetworkEndpoint. 283 func (e *endpoint) HandleLinkResolutionFailure(pkt *stack.PacketBuffer) { 284 // If we are operating as a router, we should return an ICMP error to the 285 // original packet's sender. 286 if pkt.NetworkPacketInfo.IsForwardedPacket { 287 // TODO(gvisor.dev/issue/6005): Propagate asynchronously generated ICMP 288 // errors to local endpoints. 289 e.protocol.returnError(&icmpReasonHostUnreachable{}, pkt) 290 e.stats.ip.Forwarding.Errors.Increment() 291 e.stats.ip.Forwarding.HostUnreachable.Increment() 292 return 293 } 294 // handleControl expects the entire offending packet to be in the packet 295 // buffer's data field. 296 pkt = stack.NewPacketBuffer(stack.PacketBufferOptions{ 297 Data: buffer.NewVectorisedView(pkt.Size(), pkt.Views()), 298 }) 299 defer pkt.DecRef() 300 pkt.NICID = e.nic.ID() 301 pkt.NetworkProtocolNumber = ProtocolNumber 302 e.handleControl(&icmpv6DestinationAddressUnreachableSockError{}, pkt) 303 } 304 305 // onAddressAssignedLocked handles an address being assigned. 306 // 307 // Precondition: e.mu must be exclusively locked. 308 func (e *endpoint) onAddressAssignedLocked(addr tcpip.Address) { 309 // As per RFC 2710 section 3, 310 // 311 // All MLD messages described in this document are sent with a link-local 312 // IPv6 Source Address, ... 313 // 314 // If we just completed DAD for a link-local address, then attempt to send any 315 // queued MLD reports. Note, we may have sent reports already for some of the 316 // groups before we had a valid link-local address to use as the source for 317 // the MLD messages, but that was only so that MLD snooping switches are aware 318 // of our membership to groups - routers would not have handled those reports. 319 // 320 // As per RFC 3590 section 4, 321 // 322 // MLD Report and Done messages are sent with a link-local address as 323 // the IPv6 source address, if a valid address is available on the 324 // interface. If a valid link-local address is not available (e.g., one 325 // has not been configured), the message is sent with the unspecified 326 // address (::) as the IPv6 source address. 327 // 328 // Once a valid link-local address is available, a node SHOULD generate 329 // new MLD Report messages for all multicast addresses joined on the 330 // interface. 331 // 332 // Routers receiving an MLD Report or Done message with the unspecified 333 // address as the IPv6 source address MUST silently discard the packet 334 // without taking any action on the packets contents. 335 // 336 // Snooping switches MUST manage multicast forwarding state based on MLD 337 // Report and Done messages sent with the unspecified address as the 338 // IPv6 source address. 339 if header.IsV6LinkLocalUnicastAddress(addr) { 340 e.mu.mld.sendQueuedReports() 341 } 342 } 343 344 // InvalidateDefaultRouter implements stack.NDPEndpoint. 345 func (e *endpoint) InvalidateDefaultRouter(rtr tcpip.Address) { 346 e.mu.Lock() 347 defer e.mu.Unlock() 348 349 // We represent default routers with a default (off-link) route through the 350 // router. 351 e.mu.ndp.invalidateOffLinkRoute(offLinkRoute{dest: header.IPv6EmptySubnet, router: rtr}) 352 } 353 354 // SetNDPConfigurations implements NDPEndpoint. 355 func (e *endpoint) SetNDPConfigurations(c NDPConfigurations) { 356 c.validate() 357 e.mu.Lock() 358 defer e.mu.Unlock() 359 e.mu.ndp.configs = c 360 } 361 362 // hasTentativeAddr returns true if addr is tentative on e. 363 func (e *endpoint) hasTentativeAddr(addr tcpip.Address) bool { 364 e.mu.RLock() 365 addressEndpoint := e.getAddressRLocked(addr) 366 e.mu.RUnlock() 367 return addressEndpoint != nil && addressEndpoint.GetKind() == stack.PermanentTentative 368 } 369 370 // dupTentativeAddrDetected attempts to inform e that a tentative addr is a 371 // duplicate on a link. 372 // 373 // dupTentativeAddrDetected removes the tentative address if it exists. If the 374 // address was generated via SLAAC, an attempt is made to generate a new 375 // address. 376 func (e *endpoint) dupTentativeAddrDetected(addr tcpip.Address, holderLinkAddr tcpip.LinkAddress, nonce []byte) tcpip.Error { 377 e.mu.Lock() 378 defer e.mu.Unlock() 379 380 addressEndpoint := e.getAddressRLocked(addr) 381 if addressEndpoint == nil { 382 return &tcpip.ErrBadAddress{} 383 } 384 385 if addressEndpoint.GetKind() != stack.PermanentTentative { 386 return &tcpip.ErrInvalidEndpointState{} 387 } 388 389 switch result := e.mu.ndp.dad.ExtendIfNonceEqualLocked(addr, nonce); result { 390 case ip.Extended: 391 // The nonce we got back was the same we sent so we know the message 392 // indicating a duplicate address was likely ours so do not consider 393 // the address duplicate here. 394 return nil 395 case ip.AlreadyExtended: 396 // See Extended. 397 // 398 // Our DAD message was looped back already. 399 return nil 400 case ip.NoDADStateFound: 401 panic(fmt.Sprintf("expected DAD state for tentative address %s", addr)) 402 case ip.NonceDisabled: 403 // If nonce is disabled then we have no way to know if the packet was 404 // looped-back so we have to assume it indicates a duplicate address. 405 fallthrough 406 case ip.NonceNotEqual: 407 // If the address is a SLAAC address, do not invalidate its SLAAC prefix as an 408 // attempt will be made to generate a new address for it. 409 if err := e.removePermanentEndpointLocked(addressEndpoint, false /* allowSLAACInvalidation */, &stack.DADDupAddrDetected{HolderLinkAddress: holderLinkAddr}); err != nil { 410 return err 411 } 412 413 prefix := addressEndpoint.Subnet() 414 415 switch t := addressEndpoint.ConfigType(); t { 416 case stack.AddressConfigStatic: 417 case stack.AddressConfigSlaac: 418 e.mu.ndp.regenerateSLAACAddr(prefix) 419 case stack.AddressConfigSlaacTemp: 420 // Do not reset the generation attempts counter for the prefix as the 421 // temporary address is being regenerated in response to a DAD conflict. 422 e.mu.ndp.regenerateTempSLAACAddr(prefix, false /* resetGenAttempts */) 423 default: 424 panic(fmt.Sprintf("unrecognized address config type = %d", t)) 425 } 426 427 return nil 428 default: 429 panic(fmt.Sprintf("unhandled result = %d", result)) 430 } 431 } 432 433 // Forwarding implements stack.ForwardingNetworkEndpoint. 434 func (e *endpoint) Forwarding() bool { 435 return atomic.LoadUint32(&e.forwarding) == forwardingEnabled 436 } 437 438 // setForwarding sets the forwarding status for the endpoint. 439 // 440 // Returns true if the forwarding status was updated. 441 func (e *endpoint) setForwarding(v bool) bool { 442 forwarding := uint32(forwardingDisabled) 443 if v { 444 forwarding = forwardingEnabled 445 } 446 447 return atomic.SwapUint32(&e.forwarding, forwarding) != forwarding 448 } 449 450 // SetForwarding implements stack.ForwardingNetworkEndpoint. 451 func (e *endpoint) SetForwarding(forwarding bool) { 452 e.mu.Lock() 453 defer e.mu.Unlock() 454 455 if !e.setForwarding(forwarding) { 456 return 457 } 458 459 allRoutersGroups := [...]tcpip.Address{ 460 header.IPv6AllRoutersInterfaceLocalMulticastAddress, 461 header.IPv6AllRoutersLinkLocalMulticastAddress, 462 header.IPv6AllRoutersSiteLocalMulticastAddress, 463 } 464 465 if forwarding { 466 // As per RFC 4291 section 2.8: 467 // 468 // A router is required to recognize all addresses that a host is 469 // required to recognize, plus the following addresses as identifying 470 // itself: 471 // 472 // o The All-Routers multicast addresses defined in Section 2.7.1. 473 // 474 // As per RFC 4291 section 2.7.1, 475 // 476 // All Routers Addresses: FF01:0:0:0:0:0:0:2 477 // FF02:0:0:0:0:0:0:2 478 // FF05:0:0:0:0:0:0:2 479 // 480 // The above multicast addresses identify the group of all IPv6 routers, 481 // within scope 1 (interface-local), 2 (link-local), or 5 (site-local). 482 for _, g := range allRoutersGroups { 483 if err := e.joinGroupLocked(g); err != nil { 484 // joinGroupLocked only returns an error if the group address is not a 485 // valid IPv6 multicast address. 486 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", g, err)) 487 } 488 } 489 } else { 490 for _, g := range allRoutersGroups { 491 switch err := e.leaveGroupLocked(g).(type) { 492 case nil: 493 case *tcpip.ErrBadLocalAddress: 494 // The endpoint may have already left the multicast group. 495 default: 496 panic(fmt.Sprintf("e.leaveGroupLocked(%s): %s", g, err)) 497 } 498 } 499 } 500 501 e.mu.ndp.forwardingChanged(forwarding) 502 } 503 504 // Enable implements stack.NetworkEndpoint. 505 func (e *endpoint) Enable() tcpip.Error { 506 e.mu.Lock() 507 defer e.mu.Unlock() 508 509 // If the NIC is not enabled, the endpoint can't do anything meaningful so 510 // don't enable the endpoint. 511 if !e.nic.Enabled() { 512 return &tcpip.ErrNotPermitted{} 513 } 514 515 // If the endpoint is already enabled, there is nothing for it to do. 516 if !e.setEnabled(true) { 517 return nil 518 } 519 520 // Groups may have been joined when the endpoint was disabled, or the 521 // endpoint may have left groups from the perspective of MLD when the 522 // endpoint was disabled. Either way, we need to let routers know to 523 // send us multicast traffic. 524 e.mu.mld.initializeAll() 525 526 // Join the IPv6 All-Nodes Multicast group if the stack is configured to 527 // use IPv6. This is required to ensure that this node properly receives 528 // and responds to the various NDP messages that are destined to the 529 // all-nodes multicast address. An example is the Neighbor Advertisement 530 // when we perform Duplicate Address Detection, or Router Advertisement 531 // when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861 532 // section 4.2 for more information. 533 // 534 // Also auto-generate an IPv6 link-local address based on the endpoint's 535 // link address if it is configured to do so. Note, each interface is 536 // required to have IPv6 link-local unicast address, as per RFC 4291 537 // section 2.1. 538 539 // Join the All-Nodes multicast group before starting DAD as responses to DAD 540 // (NDP NS) messages may be sent to the All-Nodes multicast group if the 541 // source address of the NDP NS is the unspecified address, as per RFC 4861 542 // section 7.2.4. 543 if err := e.joinGroupLocked(header.IPv6AllNodesMulticastAddress); err != nil { 544 // joinGroupLocked only returns an error if the group address is not a valid 545 // IPv6 multicast address. 546 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", header.IPv6AllNodesMulticastAddress, err)) 547 } 548 549 // Perform DAD on the all the unicast IPv6 endpoints that are in the permanent 550 // state. 551 // 552 // Addresses may have already completed DAD but in the time since the endpoint 553 // was last enabled, other devices may have acquired the same addresses. 554 var err tcpip.Error 555 e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { 556 addr := addressEndpoint.AddressWithPrefix().Address 557 if !header.IsV6UnicastAddress(addr) { 558 return true 559 } 560 561 switch addressEndpoint.GetKind() { 562 case stack.Permanent: 563 addressEndpoint.SetKind(stack.PermanentTentative) 564 fallthrough 565 case stack.PermanentTentative: 566 err = e.mu.ndp.startDuplicateAddressDetection(addr, addressEndpoint) 567 return err == nil 568 default: 569 return true 570 } 571 }) 572 if err != nil { 573 return err 574 } 575 576 // Do not auto-generate an IPv6 link-local address for loopback devices. 577 if e.protocol.options.AutoGenLinkLocal && !e.nic.IsLoopback() { 578 // The valid and preferred lifetime is infinite for the auto-generated 579 // link-local address. 580 e.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime) 581 } 582 583 e.mu.ndp.startSolicitingRouters() 584 return nil 585 } 586 587 // Enabled implements stack.NetworkEndpoint. 588 func (e *endpoint) Enabled() bool { 589 return e.nic.Enabled() && e.isEnabled() 590 } 591 592 // isEnabled returns true if the endpoint is enabled, regardless of the 593 // enabled status of the NIC. 594 func (e *endpoint) isEnabled() bool { 595 return atomic.LoadUint32(&e.enabled) == 1 596 } 597 598 // setEnabled sets the enabled status for the endpoint. 599 // 600 // Returns true if the enabled status was updated. 601 func (e *endpoint) setEnabled(v bool) bool { 602 if v { 603 return atomic.SwapUint32(&e.enabled, 1) == 0 604 } 605 return atomic.SwapUint32(&e.enabled, 0) == 1 606 } 607 608 // Disable implements stack.NetworkEndpoint. 609 func (e *endpoint) Disable() { 610 e.mu.Lock() 611 defer e.mu.Unlock() 612 e.disableLocked() 613 } 614 615 func (e *endpoint) disableLocked() { 616 if !e.Enabled() { 617 return 618 } 619 620 e.mu.ndp.stopSolicitingRouters() 621 // Stop DAD for all the tentative unicast addresses. 622 e.mu.addressableEndpointState.ForEachEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { 623 if addressEndpoint.GetKind() != stack.PermanentTentative { 624 return true 625 } 626 627 addr := addressEndpoint.AddressWithPrefix().Address 628 if header.IsV6UnicastAddress(addr) { 629 e.mu.ndp.stopDuplicateAddressDetection(addr, &stack.DADAborted{}) 630 } 631 632 return true 633 }) 634 e.mu.ndp.cleanupState() 635 636 // The endpoint may have already left the multicast group. 637 switch err := e.leaveGroupLocked(header.IPv6AllNodesMulticastAddress).(type) { 638 case nil, *tcpip.ErrBadLocalAddress: 639 default: 640 panic(fmt.Sprintf("unexpected error when leaving group = %s: %s", header.IPv6AllNodesMulticastAddress, err)) 641 } 642 643 // Leave groups from the perspective of MLD so that routers know that 644 // we are no longer interested in the group. 645 e.mu.mld.softLeaveAll() 646 647 if !e.setEnabled(false) { 648 panic("should have only done work to disable the endpoint if it was enabled") 649 } 650 } 651 652 // DefaultTTL is the default hop limit for this endpoint. 653 func (e *endpoint) DefaultTTL() uint8 { 654 return e.protocol.DefaultTTL() 655 } 656 657 // MTU implements stack.NetworkEndpoint. It returns the link-layer MTU minus the 658 // network layer max header length. 659 func (e *endpoint) MTU() uint32 { 660 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), header.IPv6MinimumSize) 661 if err != nil { 662 return 0 663 } 664 return networkMTU 665 } 666 667 // MaxHeaderLength returns the maximum length needed by ipv6 headers (and 668 // underlying protocols). 669 func (e *endpoint) MaxHeaderLength() uint16 { 670 // TODO(gvisor.dev/issues/5035): The maximum header length returned here does 671 // not open the possibility for the caller to know about size required for 672 // extension headers. 673 return e.nic.MaxHeaderLength() + header.IPv6MinimumSize 674 } 675 676 func addIPHeader(srcAddr, dstAddr tcpip.Address, pkt *stack.PacketBuffer, params stack.NetworkHeaderParams, extensionHeaders header.IPv6ExtHdrSerializer) tcpip.Error { 677 extHdrsLen := extensionHeaders.Length() 678 length := pkt.Size() + extensionHeaders.Length() 679 if length > math.MaxUint16 { 680 return &tcpip.ErrMessageTooLong{} 681 } 682 header.IPv6(pkt.NetworkHeader().Push(header.IPv6MinimumSize + extHdrsLen)).Encode(&header.IPv6Fields{ 683 PayloadLength: uint16(length), 684 TransportProtocol: params.Protocol, 685 HopLimit: params.TTL, 686 TrafficClass: params.TOS, 687 SrcAddr: srcAddr, 688 DstAddr: dstAddr, 689 ExtensionHeaders: extensionHeaders, 690 }) 691 pkt.NetworkProtocolNumber = ProtocolNumber 692 return nil 693 } 694 695 func packetMustBeFragmented(pkt *stack.PacketBuffer, networkMTU uint32) bool { 696 payload := pkt.TransportHeader().View().Size() + pkt.Data().Size() 697 return pkt.GSOOptions.Type == stack.GSONone && uint32(payload) > networkMTU 698 } 699 700 // handleFragments fragments pkt and calls the handler function on each 701 // fragment. It returns the number of fragments handled and the number of 702 // fragments left to be processed. The IP header must already be present in the 703 // original packet. The transport header protocol number is required to avoid 704 // parsing the IPv6 extension headers. 705 func (e *endpoint) handleFragments(r *stack.Route, networkMTU uint32, pkt *stack.PacketBuffer, transProto tcpip.TransportProtocolNumber, handler func(*stack.PacketBuffer) tcpip.Error) (int, int, tcpip.Error) { 706 networkHeader := header.IPv6(pkt.NetworkHeader().View()) 707 708 // TODO(gvisor.dev/issue/3912): Once the Authentication or ESP Headers are 709 // supported for outbound packets, their length should not affect the fragment 710 // maximum payload length because they should only be transmitted once. 711 fragmentPayloadLen := (networkMTU - header.IPv6FragmentHeaderSize) &^ 7 712 if fragmentPayloadLen < header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit { 713 // We need at least 8 bytes of space left for the fragmentable part because 714 // the fragment payload must obviously be non-zero and must be a multiple 715 // of 8 as per RFC 8200 section 4.5: 716 // Each complete fragment, except possibly the last ("rightmost") one, is 717 // an integer multiple of 8 octets long. 718 return 0, 1, &tcpip.ErrMessageTooLong{} 719 } 720 721 if fragmentPayloadLen < uint32(pkt.TransportHeader().View().Size()) { 722 // As per RFC 8200 Section 4.5, the Transport Header is expected to be small 723 // enough to fit in the first fragment. 724 return 0, 1, &tcpip.ErrMessageTooLong{} 725 } 726 727 pf := fragmentation.MakePacketFragmenter(pkt, fragmentPayloadLen, calculateFragmentReserve(pkt)) 728 id := atomic.AddUint32(&e.protocol.ids[hashRoute(r, e.protocol.hashIV)%buckets], 1) 729 730 var n int 731 for { 732 fragPkt, more := buildNextFragment(&pf, networkHeader, transProto, id) 733 if err := handler(fragPkt); err != nil { 734 return n, pf.RemainingFragmentCount() + 1, err 735 } 736 n++ 737 if !more { 738 return n, pf.RemainingFragmentCount(), nil 739 } 740 } 741 } 742 743 // WritePacket writes a packet to the given destination address and protocol. 744 func (e *endpoint) WritePacket(r *stack.Route, params stack.NetworkHeaderParams, pkt *stack.PacketBuffer) tcpip.Error { 745 if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pkt, params, nil /* extensionHeaders */); err != nil { 746 return err 747 } 748 749 // iptables filtering. All packets that reach here are locally 750 // generated. 751 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 752 if ok := e.protocol.stack.IPTables().CheckOutput(pkt, r, outNicName); !ok { 753 // iptables is telling us to drop the packet. 754 e.stats.ip.IPTablesOutputDropped.Increment() 755 return nil 756 } 757 758 // If the packet is manipulated as per NAT Output rules, handle packet 759 // based on destination address and do not send the packet to link 760 // layer. 761 // 762 // We should do this for every packet, rather than only NATted packets, but 763 // removing this check short circuits broadcasts before they are sent out to 764 // other hosts. 765 if pkt.DNATDone { 766 netHeader := header.IPv6(pkt.NetworkHeader().View()) 767 if ep := e.protocol.findEndpointWithAddress(netHeader.DestinationAddress()); ep != nil { 768 // Since we rewrote the packet but it is being routed back to us, we 769 // can safely assume the checksum is valid. 770 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 771 return nil 772 } 773 } 774 775 return e.writePacket(r, pkt, params.Protocol, false /* headerIncluded */) 776 } 777 778 func (e *endpoint) writePacket(r *stack.Route, pkt *stack.PacketBuffer, protocol tcpip.TransportProtocolNumber, headerIncluded bool) tcpip.Error { 779 if r.Loop()&stack.PacketLoop != 0 { 780 // If the packet was generated by the stack (not a raw/packet endpoint 781 // where a packet may be written with the header included), then we can 782 // safely assume the checksum is valid. 783 e.handleLocalPacket(pkt, !headerIncluded /* canSkipRXChecksum */) 784 } 785 if r.Loop()&stack.PacketOut == 0 { 786 return nil 787 } 788 789 // Postrouting NAT can only change the source address, and does not alter the 790 // route or outgoing interface of the packet. 791 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 792 if ok := e.protocol.stack.IPTables().CheckPostrouting(pkt, r, e, outNicName); !ok { 793 // iptables is telling us to drop the packet. 794 e.stats.ip.IPTablesPostroutingDropped.Increment() 795 return nil 796 } 797 798 stats := e.stats.ip 799 networkMTU, err := calculateNetworkMTU(e.nic.MTU(), uint32(pkt.NetworkHeader().View().Size())) 800 if err != nil { 801 stats.OutgoingPacketErrors.Increment() 802 return err 803 } 804 805 if packetMustBeFragmented(pkt, networkMTU) { 806 if pkt.NetworkPacketInfo.IsForwardedPacket { 807 // As per RFC 2460, section 4.5: 808 // Unlike IPv4, fragmentation in IPv6 is performed only by source nodes, 809 // not by routers along a packet's delivery path. 810 return &tcpip.ErrMessageTooLong{} 811 } 812 sent, remain, err := e.handleFragments(r, networkMTU, pkt, protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { 813 // TODO(gvisor.dev/issue/3884): Evaluate whether we want to send each 814 // fragment one by one using WritePacket() (current strategy) or if we 815 // want to create a PacketBufferList from the fragments and feed it to 816 // WritePackets(). It'll be faster but cost more memory. 817 return e.nic.WritePacket(r, ProtocolNumber, fragPkt) 818 }) 819 stats.PacketsSent.IncrementBy(uint64(sent)) 820 stats.OutgoingPacketErrors.IncrementBy(uint64(remain)) 821 return err 822 } 823 824 if err := e.nic.WritePacket(r, ProtocolNumber, pkt); err != nil { 825 stats.OutgoingPacketErrors.Increment() 826 return err 827 } 828 829 stats.PacketsSent.Increment() 830 return nil 831 } 832 833 // WritePackets implements stack.NetworkEndpoint. 834 func (e *endpoint) WritePackets(r *stack.Route, pkts stack.PacketBufferList, params stack.NetworkHeaderParams) (int, tcpip.Error) { 835 if r.Loop()&stack.PacketLoop != 0 { 836 panic("not implemented") 837 } 838 if r.Loop()&stack.PacketOut == 0 { 839 return pkts.Len(), nil 840 } 841 842 stats := e.stats.ip 843 linkMTU := e.nic.MTU() 844 for pb := pkts.Front(); pb != nil; pb = pb.Next() { 845 if err := addIPHeader(r.LocalAddress(), r.RemoteAddress(), pb, params, nil /* extensionHeaders */); err != nil { 846 return 0, err 847 } 848 849 networkMTU, err := calculateNetworkMTU(linkMTU, uint32(pb.NetworkHeader().View().Size())) 850 if err != nil { 851 stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len())) 852 return 0, err 853 } 854 if packetMustBeFragmented(pb, networkMTU) { 855 // Keep track of the packet that is about to be fragmented so it can be 856 // removed once the fragmentation is done. 857 originalPkt := pb 858 if _, _, err := e.handleFragments(r, networkMTU, pb, params.Protocol, func(fragPkt *stack.PacketBuffer) tcpip.Error { 859 fragPkt.IncRef() 860 // Modify the packet list in place with the new fragments. 861 pkts.InsertAfter(pb, fragPkt) 862 pb = fragPkt 863 return nil 864 }); err != nil { 865 stats.OutgoingPacketErrors.IncrementBy(uint64(pkts.Len())) 866 return 0, err 867 } 868 // Remove the packet that was just fragmented and process the rest. 869 pkts.Remove(originalPkt) 870 } 871 } 872 873 // iptables filtering. All packets that reach here are locally 874 // generated. 875 outNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 876 outputDropped, natPkts := e.protocol.stack.IPTables().CheckOutputPackets(pkts, r, outNicName) 877 stats.IPTablesOutputDropped.IncrementBy(uint64(len(outputDropped))) 878 for pkt := range outputDropped { 879 pkts.Remove(pkt) 880 } 881 882 // The NAT-ed packets may now be destined for us. 883 locallyDelivered := 0 884 for pkt := range natPkts { 885 ep := e.protocol.findEndpointWithAddress(header.IPv6(pkt.NetworkHeader().View()).DestinationAddress()) 886 if ep == nil { 887 // The NAT-ed packet is still destined for some remote node. 888 continue 889 } 890 891 // Do not send the locally destined packet out the NIC. 892 pkts.Remove(pkt) 893 894 // Deliver the packet locally. 895 ep.handleLocalPacket(pkt, true /* canSkipRXChecksum */) 896 locallyDelivered++ 897 } 898 899 // We ignore the list of NAT-ed packets here because Postrouting NAT can only 900 // change the source address, and does not alter the route or outgoing 901 // interface of the packet. 902 postroutingDropped, _ := e.protocol.stack.IPTables().CheckPostroutingPackets(pkts, r, e, outNicName) 903 stats.IPTablesPostroutingDropped.IncrementBy(uint64(len(postroutingDropped))) 904 for pkt := range postroutingDropped { 905 pkts.Remove(pkt) 906 } 907 908 // The rest of the packets can be delivered to the NIC as a batch. 909 pktsLen := pkts.Len() 910 written, err := e.nic.WritePackets(r, pkts, ProtocolNumber) 911 stats.PacketsSent.IncrementBy(uint64(written)) 912 stats.OutgoingPacketErrors.IncrementBy(uint64(pktsLen - written)) 913 914 // Dropped packets aren't errors, so include them in the return value. 915 return locallyDelivered + written + len(outputDropped) + len(postroutingDropped), err 916 } 917 918 // WriteHeaderIncludedPacket implements stack.NetworkEndpoint. 919 func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt *stack.PacketBuffer) tcpip.Error { 920 // The packet already has an IP header, but there are a few required checks. 921 h, ok := pkt.Data().PullUp(header.IPv6MinimumSize) 922 if !ok { 923 return &tcpip.ErrMalformedHeader{} 924 } 925 ipH := header.IPv6(h) 926 927 // Always set the payload length. 928 pktSize := pkt.Data().Size() 929 ipH.SetPayloadLength(uint16(pktSize - header.IPv6MinimumSize)) 930 931 // Set the source address when zero. 932 if ipH.SourceAddress() == header.IPv6Any { 933 ipH.SetSourceAddress(r.LocalAddress()) 934 } 935 936 // Populate the packet buffer's network header and don't allow an invalid 937 // packet to be sent. 938 // 939 // Note that parsing only makes sure that the packet is well formed as per the 940 // wire format. We also want to check if the header's fields are valid before 941 // sending the packet. 942 proto, _, _, _, ok := parse.IPv6(pkt) 943 if !ok || !header.IPv6(pkt.NetworkHeader().View()).IsValid(pktSize) { 944 return &tcpip.ErrMalformedHeader{} 945 } 946 947 return e.writePacket(r, pkt, proto, true /* headerIncluded */) 948 } 949 950 // forwardPacket attempts to forward a packet to its final destination. 951 func (e *endpoint) forwardPacket(pkt *stack.PacketBuffer) ip.ForwardingError { 952 h := header.IPv6(pkt.NetworkHeader().View()) 953 954 dstAddr := h.DestinationAddress() 955 // As per RFC 4291 section 2.5.6, 956 // 957 // Routers must not forward any packets with Link-Local source or 958 // destination addresses to other links. 959 if header.IsV6LinkLocalUnicastAddress(h.SourceAddress()) { 960 return &ip.ErrLinkLocalSourceAddress{} 961 } 962 if header.IsV6LinkLocalUnicastAddress(dstAddr) || header.IsV6LinkLocalMulticastAddress(dstAddr) { 963 return &ip.ErrLinkLocalDestinationAddress{} 964 } 965 966 hopLimit := h.HopLimit() 967 if hopLimit <= 1 { 968 // As per RFC 4443 section 3.3, 969 // 970 // If a router receives a packet with a Hop Limit of zero, or if a 971 // router decrements a packet's Hop Limit to zero, it MUST discard the 972 // packet and originate an ICMPv6 Time Exceeded message with Code 0 to 973 // the source of the packet. This indicates either a routing loop or 974 // too small an initial Hop Limit value. 975 // 976 // We return the original error rather than the result of returning 977 // the ICMP packet because the original error is more relevant to 978 // the caller. 979 _ = e.protocol.returnError(&icmpReasonHopLimitExceeded{}, pkt) 980 return &ip.ErrTTLExceeded{} 981 } 982 983 stk := e.protocol.stack 984 985 // Check if the destination is owned by the stack. 986 if ep := e.protocol.findEndpointWithAddress(dstAddr); ep != nil { 987 inNicName := stk.FindNICNameFromID(e.nic.ID()) 988 outNicName := stk.FindNICNameFromID(ep.nic.ID()) 989 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 990 // iptables is telling us to drop the packet. 991 e.stats.ip.IPTablesForwardDropped.Increment() 992 return nil 993 } 994 995 // The packet originally arrived on e so provide its NIC as the input NIC. 996 ep.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 997 return nil 998 } 999 1000 // Check extension headers for any errors requiring action during forwarding. 1001 if err := e.processExtensionHeaders(h, pkt, true /* forwarding */); err != nil { 1002 return &ip.ErrParameterProblem{} 1003 } 1004 1005 r, err := stk.FindRoute(0, "", dstAddr, ProtocolNumber, false /* multicastLoop */) 1006 switch err.(type) { 1007 case nil: 1008 case *tcpip.ErrNoRoute, *tcpip.ErrNetworkUnreachable: 1009 // We return the original error rather than the result of returning the 1010 // ICMP packet because the original error is more relevant to the caller. 1011 _ = e.protocol.returnError(&icmpReasonNetUnreachable{}, pkt) 1012 return &ip.ErrNoRoute{} 1013 default: 1014 return &ip.ErrOther{Err: err} 1015 } 1016 defer r.Release() 1017 1018 inNicName := stk.FindNICNameFromID(e.nic.ID()) 1019 outNicName := stk.FindNICNameFromID(r.NICID()) 1020 if ok := stk.IPTables().CheckForward(pkt, inNicName, outNicName); !ok { 1021 // iptables is telling us to drop the packet. 1022 e.stats.ip.IPTablesForwardDropped.Increment() 1023 return nil 1024 } 1025 1026 // We need to do a deep copy of the IP packet because 1027 // WriteHeaderIncludedPacket takes ownership of the packet buffer, but we do 1028 // not own it. 1029 newPkt := pkt.DeepCopyForForwarding(int(r.MaxHeaderLength())) 1030 defer newPkt.DecRef() 1031 newHdr := header.IPv6(newPkt.NetworkHeader().View()) 1032 1033 // As per RFC 8200 section 3, 1034 // 1035 // Hop Limit 8-bit unsigned integer. Decremented by 1 by 1036 // each node that forwards the packet. 1037 newHdr.SetHopLimit(hopLimit - 1) 1038 1039 forwardToEp, ok := e.protocol.getEndpointForNIC(r.NICID()) 1040 if !ok { 1041 // The interface was removed after we obtained the route. 1042 return &ip.ErrOther{Err: &tcpip.ErrUnknownDevice{}} 1043 } 1044 1045 switch err := forwardToEp.writePacket(r, newPkt, newPkt.TransportProtocolNumber, true /* headerIncluded */); err.(type) { 1046 case nil: 1047 return nil 1048 case *tcpip.ErrMessageTooLong: 1049 // As per RFC 4443, section 3.2: 1050 // A Packet Too Big MUST be sent by a router in response to a packet that 1051 // it cannot forward because the packet is larger than the MTU of the 1052 // outgoing link. 1053 _ = e.protocol.returnError(&icmpReasonPacketTooBig{}, pkt) 1054 return &ip.ErrMessageTooLong{} 1055 default: 1056 return &ip.ErrOther{Err: err} 1057 } 1058 } 1059 1060 // HandlePacket is called by the link layer when new ipv6 packets arrive for 1061 // this endpoint. 1062 func (e *endpoint) HandlePacket(pkt *stack.PacketBuffer) { 1063 stats := e.stats.ip 1064 1065 stats.PacketsReceived.Increment() 1066 1067 if !e.isEnabled() { 1068 stats.DisabledPacketsReceived.Increment() 1069 return 1070 } 1071 1072 h, ok := e.protocol.parseAndValidate(pkt) 1073 if !ok { 1074 stats.MalformedPacketsReceived.Increment() 1075 return 1076 } 1077 1078 if !e.nic.IsLoopback() { 1079 if !e.protocol.options.AllowExternalLoopbackTraffic { 1080 if header.IsV6LoopbackAddress(h.SourceAddress()) { 1081 stats.InvalidSourceAddressesReceived.Increment() 1082 return 1083 } 1084 1085 if header.IsV6LoopbackAddress(h.DestinationAddress()) { 1086 stats.InvalidDestinationAddressesReceived.Increment() 1087 return 1088 } 1089 } 1090 1091 if e.protocol.stack.HandleLocal() { 1092 addressEndpoint := e.AcquireAssignedAddress(header.IPv6(pkt.NetworkHeader().View()).SourceAddress(), e.nic.Promiscuous(), stack.CanBePrimaryEndpoint) 1093 if addressEndpoint != nil { 1094 addressEndpoint.DecRef() 1095 1096 // The source address is one of our own, so we never should have gotten 1097 // a packet like this unless HandleLocal is false or our NIC is the 1098 // loopback interface. 1099 stats.InvalidSourceAddressesReceived.Increment() 1100 return 1101 } 1102 } 1103 1104 // Loopback traffic skips the prerouting chain. 1105 inNicName := e.protocol.stack.FindNICNameFromID(e.nic.ID()) 1106 if ok := e.protocol.stack.IPTables().CheckPrerouting(pkt, e, inNicName); !ok { 1107 // iptables is telling us to drop the packet. 1108 stats.IPTablesPreroutingDropped.Increment() 1109 return 1110 } 1111 } 1112 1113 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 1114 } 1115 1116 // handleLocalPacket is like HandlePacket except it does not perform the 1117 // prerouting iptables hook or check for loopback traffic that originated from 1118 // outside of the netstack (i.e. martian loopback packets). 1119 func (e *endpoint) handleLocalPacket(pkt *stack.PacketBuffer, canSkipRXChecksum bool) { 1120 stats := e.stats.ip 1121 stats.PacketsReceived.Increment() 1122 1123 pkt = pkt.CloneToInbound() 1124 defer pkt.DecRef() 1125 pkt.RXTransportChecksumValidated = canSkipRXChecksum 1126 1127 h, ok := e.protocol.parseAndValidate(pkt) 1128 if !ok { 1129 stats.MalformedPacketsReceived.Increment() 1130 return 1131 } 1132 1133 e.handleValidatedPacket(h, pkt, e.nic.Name() /* inNICName */) 1134 } 1135 1136 func (e *endpoint) handleValidatedPacket(h header.IPv6, pkt *stack.PacketBuffer, inNICName string) { 1137 pkt.NICID = e.nic.ID() 1138 1139 // Raw socket packets are delivered based solely on the transport protocol 1140 // number. We only require that the packet be valid IPv6. 1141 e.dispatcher.DeliverRawPacket(h.TransportProtocol(), pkt) 1142 1143 stats := e.stats.ip 1144 stats.ValidPacketsReceived.Increment() 1145 1146 srcAddr := h.SourceAddress() 1147 dstAddr := h.DestinationAddress() 1148 1149 // As per RFC 4291 section 2.7: 1150 // Multicast addresses must not be used as source addresses in IPv6 1151 // packets or appear in any Routing header. 1152 if header.IsV6MulticastAddress(srcAddr) { 1153 stats.InvalidSourceAddressesReceived.Increment() 1154 return 1155 } 1156 1157 // The destination address should be an address we own or a group we joined 1158 // for us to receive the packet. Otherwise, attempt to forward the packet. 1159 if addressEndpoint := e.AcquireAssignedAddress(dstAddr, e.nic.Promiscuous(), stack.CanBePrimaryEndpoint); addressEndpoint != nil { 1160 addressEndpoint.DecRef() 1161 } else if !e.IsInGroup(dstAddr) { 1162 if !e.Forwarding() { 1163 stats.InvalidDestinationAddressesReceived.Increment() 1164 return 1165 } 1166 switch err := e.forwardPacket(pkt); err.(type) { 1167 case nil: 1168 return 1169 case *ip.ErrLinkLocalSourceAddress: 1170 e.stats.ip.Forwarding.LinkLocalSource.Increment() 1171 case *ip.ErrLinkLocalDestinationAddress: 1172 e.stats.ip.Forwarding.LinkLocalDestination.Increment() 1173 case *ip.ErrTTLExceeded: 1174 e.stats.ip.Forwarding.ExhaustedTTL.Increment() 1175 case *ip.ErrNoRoute: 1176 e.stats.ip.Forwarding.Unrouteable.Increment() 1177 case *ip.ErrParameterProblem: 1178 e.stats.ip.Forwarding.ExtensionHeaderProblem.Increment() 1179 case *ip.ErrMessageTooLong: 1180 e.stats.ip.Forwarding.PacketTooBig.Increment() 1181 default: 1182 panic(fmt.Sprintf("unexpected error %s while trying to forward packet: %#v", err, pkt)) 1183 } 1184 e.stats.ip.Forwarding.Errors.Increment() 1185 return 1186 } 1187 1188 // iptables filtering. All packets that reach here are intended for 1189 // this machine and need not be forwarded. 1190 if ok := e.protocol.stack.IPTables().CheckInput(pkt, inNICName); !ok { 1191 // iptables is telling us to drop the packet. 1192 stats.IPTablesInputDropped.Increment() 1193 return 1194 } 1195 1196 // Any returned error is only useful for terminating execution early, but 1197 // we have nothing left to do, so we can drop it. 1198 _ = e.processExtensionHeaders(h, pkt, false /* forwarding */) 1199 } 1200 1201 // processExtensionHeaders processes the extension headers in the given packet. 1202 // Returns an error if the processing of a header failed or if the packet should 1203 // be discarded. 1204 func (e *endpoint) processExtensionHeaders(h header.IPv6, pkt *stack.PacketBuffer, forwarding bool) error { 1205 stats := e.stats.ip 1206 srcAddr := h.SourceAddress() 1207 dstAddr := h.DestinationAddress() 1208 1209 // Create a VV to parse the packet. We don't plan to modify anything here. 1210 // vv consists of: 1211 // - Any IPv6 header bytes after the first 40 (i.e. extensions). 1212 // - The transport header, if present. 1213 // - Any other payload data. 1214 vv := pkt.NetworkHeader().View()[header.IPv6MinimumSize:].ToVectorisedView() 1215 vv.AppendView(pkt.TransportHeader().View()) 1216 vv.AppendViews(pkt.Data().Views()) 1217 it := header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(h.NextHeader()), vv) 1218 1219 var ( 1220 hasFragmentHeader bool 1221 routerAlert *header.IPv6RouterAlertOption 1222 ) 1223 1224 for { 1225 // Keep track of the start of the previous header so we can report the 1226 // special case of a Hop by Hop at a location other than at the start. 1227 previousHeaderStart := it.HeaderOffset() 1228 extHdr, done, err := it.Next() 1229 if err != nil { 1230 stats.MalformedPacketsReceived.Increment() 1231 return err 1232 } 1233 if done { 1234 break 1235 } 1236 1237 // As per RFC 8200, section 4: 1238 // 1239 // Extension headers (except for the Hop-by-Hop Options header) are 1240 // not processed, inserted, or deleted by any node along a packet's 1241 // delivery path until the packet reaches the node identified in the 1242 // Destination Address field of the IPv6 header. 1243 // 1244 // Furthermore, as per RFC 8200 section 4.1, the Hop By Hop extension 1245 // header is restricted to appear first in the list of extension headers. 1246 // 1247 // Therefore, we can immediately return once we hit any header other 1248 // than the Hop-by-Hop header while forwarding a packet. 1249 if forwarding { 1250 if _, ok := extHdr.(header.IPv6HopByHopOptionsExtHdr); !ok { 1251 return nil 1252 } 1253 } 1254 1255 switch extHdr := extHdr.(type) { 1256 case header.IPv6HopByHopOptionsExtHdr: 1257 // As per RFC 8200 section 4.1, the Hop By Hop extension header is 1258 // restricted to appear immediately after an IPv6 fixed header. 1259 if previousHeaderStart != 0 { 1260 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1261 code: header.ICMPv6UnknownHeader, 1262 pointer: previousHeaderStart, 1263 forwarding: forwarding, 1264 }, pkt) 1265 return fmt.Errorf("found Hop-by-Hop header = %#v with non-zero previous header offset = %d", extHdr, previousHeaderStart) 1266 } 1267 1268 optsIt := extHdr.Iter() 1269 1270 for { 1271 opt, done, err := optsIt.Next() 1272 if err != nil { 1273 stats.MalformedPacketsReceived.Increment() 1274 return err 1275 } 1276 if done { 1277 break 1278 } 1279 1280 switch opt := opt.(type) { 1281 case *header.IPv6RouterAlertOption: 1282 if routerAlert != nil { 1283 // As per RFC 2711 section 3, there should be at most one Router 1284 // Alert option per packet. 1285 // 1286 // There MUST only be one option of this type, regardless of 1287 // value, per Hop-by-Hop header. 1288 stats.MalformedPacketsReceived.Increment() 1289 return fmt.Errorf("found multiple Router Alert options (%#v, %#v)", opt, routerAlert) 1290 } 1291 routerAlert = opt 1292 stats.OptionRouterAlertReceived.Increment() 1293 default: 1294 switch opt.UnknownAction() { 1295 case header.IPv6OptionUnknownActionSkip: 1296 case header.IPv6OptionUnknownActionDiscard: 1297 return fmt.Errorf("found unknown Hop-by-Hop header option = %#v with discard action", opt) 1298 case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest: 1299 if header.IsV6MulticastAddress(dstAddr) { 1300 return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt) 1301 } 1302 fallthrough 1303 case header.IPv6OptionUnknownActionDiscardSendICMP: 1304 // This case satisfies a requirement of RFC 8200 section 4.2 which 1305 // states that an unknown option starting with bits [10] should: 1306 // 1307 // discard the packet and, regardless of whether or not the 1308 // packet's Destination Address was a multicast address, send an 1309 // ICMP Parameter Problem, Code 2, message to the packet's 1310 // Source Address, pointing to the unrecognized Option Type. 1311 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1312 code: header.ICMPv6UnknownOption, 1313 pointer: it.ParseOffset() + optsIt.OptionOffset(), 1314 respondToMulticast: true, 1315 forwarding: forwarding, 1316 }, pkt) 1317 return fmt.Errorf("found unknown hop-by-hop header option = %#v with discard action", opt) 1318 default: 1319 panic(fmt.Sprintf("unrecognized action for an unrecognized Hop By Hop extension header option = %#v", opt)) 1320 } 1321 } 1322 } 1323 1324 case header.IPv6RoutingExtHdr: 1325 // As per RFC 8200 section 4.4, if a node encounters a routing header with 1326 // an unrecognized routing type value, with a non-zero Segments Left 1327 // value, the node must discard the packet and send an ICMP Parameter 1328 // Problem, Code 0 to the packet's Source Address, pointing to the 1329 // unrecognized Routing Type. 1330 // 1331 // If the Segments Left is 0, the node must ignore the Routing extension 1332 // header and process the next header in the packet. 1333 // 1334 // Note, the stack does not yet handle any type of routing extension 1335 // header, so we just make sure Segments Left is zero before processing 1336 // the next extension header. 1337 if extHdr.SegmentsLeft() != 0 { 1338 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1339 code: header.ICMPv6ErroneousHeader, 1340 pointer: it.ParseOffset(), 1341 // For the sake of consistency, we're using the value of `forwarding` 1342 // here, even though it should always be false if we've reached this 1343 // point. If `forwarding` is true here, we're executing undefined 1344 // behavior no matter what. 1345 forwarding: forwarding, 1346 }, pkt) 1347 return fmt.Errorf("found unrecognized routing type with non-zero segments left in header = %#v", extHdr) 1348 } 1349 1350 case header.IPv6FragmentExtHdr: 1351 hasFragmentHeader = true 1352 1353 if extHdr.IsAtomic() { 1354 // This fragment extension header indicates that this packet is an 1355 // atomic fragment. An atomic fragment is a fragment that contains 1356 // all the data required to reassemble a full packet. As per RFC 6946, 1357 // atomic fragments must not interfere with "normal" fragmented traffic 1358 // so we skip processing the fragment instead of feeding it through the 1359 // reassembly process below. 1360 continue 1361 } 1362 1363 fragmentFieldOffset := it.ParseOffset() 1364 1365 // Don't consume the iterator if we have the first fragment because we 1366 // will use it to validate that the first fragment holds the upper layer 1367 // header. 1368 rawPayload := it.AsRawHeader(extHdr.FragmentOffset() != 0 /* consume */) 1369 1370 if extHdr.FragmentOffset() == 0 { 1371 // Check that the iterator ends with a raw payload as the first fragment 1372 // should include all headers up to and including any upper layer 1373 // headers, as per RFC 8200 section 4.5; only upper layer data 1374 // (non-headers) should follow the fragment extension header. 1375 var lastHdr header.IPv6PayloadHeader 1376 1377 for { 1378 it, done, err := it.Next() 1379 if err != nil { 1380 stats.MalformedPacketsReceived.Increment() 1381 stats.MalformedFragmentsReceived.Increment() 1382 return err 1383 } 1384 if done { 1385 break 1386 } 1387 1388 lastHdr = it 1389 } 1390 1391 // If the last header is a raw header, then the last portion of the IPv6 1392 // payload is not a known IPv6 extension header. Note, this does not 1393 // mean that the last portion is an upper layer header or not an 1394 // extension header because: 1395 // 1) we do not yet support all extension headers 1396 // 2) we do not validate the upper layer header before reassembling. 1397 // 1398 // This check makes sure that a known IPv6 extension header is not 1399 // present after the Fragment extension header in a non-initial 1400 // fragment. 1401 // 1402 // TODO(#2196): Support IPv6 Authentication and Encapsulated 1403 // Security Payload extension headers. 1404 // TODO(#2333): Validate that the upper layer header is valid. 1405 switch lastHdr.(type) { 1406 case header.IPv6RawPayloadHeader: 1407 default: 1408 stats.MalformedPacketsReceived.Increment() 1409 stats.MalformedFragmentsReceived.Increment() 1410 return fmt.Errorf("known extension header = %#v present after fragment header in a non-initial fragment", lastHdr) 1411 } 1412 } 1413 1414 fragmentPayloadLen := rawPayload.Buf.Size() 1415 if fragmentPayloadLen == 0 { 1416 // Drop the packet as it's marked as a fragment but has no payload. 1417 stats.MalformedPacketsReceived.Increment() 1418 stats.MalformedFragmentsReceived.Increment() 1419 return fmt.Errorf("fragment has no payload") 1420 } 1421 1422 // As per RFC 2460 Section 4.5: 1423 // 1424 // If the length of a fragment, as derived from the fragment packet's 1425 // Payload Length field, is not a multiple of 8 octets and the M flag 1426 // of that fragment is 1, then that fragment must be discarded and an 1427 // ICMP Parameter Problem, Code 0, message should be sent to the source 1428 // of the fragment, pointing to the Payload Length field of the 1429 // fragment packet. 1430 if extHdr.More() && fragmentPayloadLen%header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit != 0 { 1431 stats.MalformedPacketsReceived.Increment() 1432 stats.MalformedFragmentsReceived.Increment() 1433 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1434 code: header.ICMPv6ErroneousHeader, 1435 pointer: header.IPv6PayloadLenOffset, 1436 }, pkt) 1437 return fmt.Errorf("found fragment length = %d that is not a multiple of 8 octets", fragmentPayloadLen) 1438 } 1439 1440 // The packet is a fragment, let's try to reassemble it. 1441 start := extHdr.FragmentOffset() * header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit 1442 1443 // As per RFC 2460 Section 4.5: 1444 // 1445 // If the length and offset of a fragment are such that the Payload 1446 // Length of the packet reassembled from that fragment would exceed 1447 // 65,535 octets, then that fragment must be discarded and an ICMP 1448 // Parameter Problem, Code 0, message should be sent to the source of 1449 // the fragment, pointing to the Fragment Offset field of the fragment 1450 // packet. 1451 lengthAfterReassembly := int(start) + fragmentPayloadLen 1452 if lengthAfterReassembly > header.IPv6MaximumPayloadSize { 1453 stats.MalformedPacketsReceived.Increment() 1454 stats.MalformedFragmentsReceived.Increment() 1455 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1456 code: header.ICMPv6ErroneousHeader, 1457 pointer: fragmentFieldOffset, 1458 }, pkt) 1459 return fmt.Errorf("determined that reassembled packet length = %d would exceed allowed length = %d", lengthAfterReassembly, header.IPv6MaximumPayloadSize) 1460 } 1461 1462 // Note that pkt doesn't have its transport header set after reassembly, 1463 // and won't until DeliverNetworkPacket sets it. 1464 resPkt, proto, ready, err := e.protocol.fragmentation.Process( 1465 // IPv6 ignores the Protocol field since the ID only needs to be unique 1466 // across source-destination pairs, as per RFC 8200 section 4.5. 1467 fragmentation.FragmentID{ 1468 Source: srcAddr, 1469 Destination: dstAddr, 1470 ID: extHdr.ID(), 1471 }, 1472 start, 1473 start+uint16(fragmentPayloadLen)-1, 1474 extHdr.More(), 1475 uint8(rawPayload.Identifier), 1476 pkt, 1477 ) 1478 if err != nil { 1479 stats.MalformedPacketsReceived.Increment() 1480 stats.MalformedFragmentsReceived.Increment() 1481 return err 1482 } 1483 1484 if ready { 1485 pkt = resPkt 1486 1487 // We create a new iterator with the reassembled packet because we could 1488 // have more extension headers in the reassembled payload, as per RFC 1489 // 8200 section 4.5. We also use the NextHeader value from the first 1490 // fragment. 1491 data := pkt.Data() 1492 dataVV := buffer.NewVectorisedView(data.Size(), data.Views()) 1493 it = header.MakeIPv6PayloadIterator(header.IPv6ExtensionHeaderIdentifier(proto), dataVV) 1494 } 1495 1496 case header.IPv6DestinationOptionsExtHdr: 1497 optsIt := extHdr.Iter() 1498 1499 for { 1500 opt, done, err := optsIt.Next() 1501 if err != nil { 1502 stats.MalformedPacketsReceived.Increment() 1503 return err 1504 } 1505 if done { 1506 break 1507 } 1508 1509 // We currently do not support any IPv6 Destination extension header 1510 // options. 1511 switch opt.UnknownAction() { 1512 case header.IPv6OptionUnknownActionSkip: 1513 case header.IPv6OptionUnknownActionDiscard: 1514 return fmt.Errorf("found unknown destination header option = %#v with discard action", opt) 1515 case header.IPv6OptionUnknownActionDiscardSendICMPNoMulticastDest: 1516 if header.IsV6MulticastAddress(dstAddr) { 1517 return fmt.Errorf("found unknown destination header option %#v with discard action", opt) 1518 } 1519 fallthrough 1520 case header.IPv6OptionUnknownActionDiscardSendICMP: 1521 // This case satisfies a requirement of RFC 8200 section 4.2 1522 // which states that an unknown option starting with bits [10] should: 1523 // 1524 // discard the packet and, regardless of whether or not the 1525 // packet's Destination Address was a multicast address, send an 1526 // ICMP Parameter Problem, Code 2, message to the packet's 1527 // Source Address, pointing to the unrecognized Option Type. 1528 // 1529 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1530 code: header.ICMPv6UnknownOption, 1531 pointer: it.ParseOffset() + optsIt.OptionOffset(), 1532 respondToMulticast: true, 1533 }, pkt) 1534 return fmt.Errorf("found unknown destination header option %#v with discard action", opt) 1535 default: 1536 panic(fmt.Sprintf("unrecognized action for an unrecognized Destination extension header option = %#v", opt)) 1537 } 1538 } 1539 1540 case header.IPv6RawPayloadHeader: 1541 // If the last header in the payload isn't a known IPv6 extension header, 1542 // handle it as if it is transport layer data. 1543 1544 // Calculate the number of octets parsed from data. We want to consume all 1545 // the data except the unparsed portion located at the end, whose size is 1546 // extHdr.Buf.Size(). 1547 trim := pkt.Data().Size() - extHdr.Buf.Size() 1548 1549 // For unfragmented packets, extHdr still contains the transport header. 1550 // Consume that too. 1551 // 1552 // For reassembled fragments, pkt.TransportHeader is unset, so this is a 1553 // no-op and pkt.Data begins with the transport header. 1554 trim += pkt.TransportHeader().View().Size() 1555 1556 if _, ok := pkt.Data().Consume(trim); !ok { 1557 stats.MalformedPacketsReceived.Increment() 1558 return fmt.Errorf("could not consume %d bytes", trim) 1559 } 1560 1561 proto := tcpip.TransportProtocolNumber(extHdr.Identifier) 1562 // If the packet was reassembled from a fragment, it will not have a 1563 // transport header set yet. 1564 if pkt.TransportHeader().View().IsEmpty() { 1565 e.protocol.parseTransport(pkt, proto) 1566 } 1567 1568 stats.PacketsDelivered.Increment() 1569 if proto == header.ICMPv6ProtocolNumber { 1570 e.handleICMP(pkt, hasFragmentHeader, routerAlert) 1571 } else { 1572 stats.PacketsDelivered.Increment() 1573 switch res := e.dispatcher.DeliverTransportPacket(proto, pkt); res { 1574 case stack.TransportPacketHandled: 1575 case stack.TransportPacketDestinationPortUnreachable: 1576 // As per RFC 4443 section 3.1: 1577 // A destination node SHOULD originate a Destination Unreachable 1578 // message with Code 4 in response to a packet for which the 1579 // transport protocol (e.g., UDP) has no listener, if that transport 1580 // protocol has no alternative means to inform the sender. 1581 _ = e.protocol.returnError(&icmpReasonPortUnreachable{}, pkt) 1582 return fmt.Errorf("destination port unreachable") 1583 case stack.TransportPacketProtocolUnreachable: 1584 // As per RFC 8200 section 4. (page 7): 1585 // Extension headers are numbered from IANA IP Protocol Numbers 1586 // [IANA-PN], the same values used for IPv4 and IPv6. When 1587 // processing a sequence of Next Header values in a packet, the 1588 // first one that is not an extension header [IANA-EH] indicates 1589 // that the next item in the packet is the corresponding upper-layer 1590 // header. 1591 // With more related information on page 8: 1592 // If, as a result of processing a header, the destination node is 1593 // required to proceed to the next header but the Next Header value 1594 // in the current header is unrecognized by the node, it should 1595 // discard the packet and send an ICMP Parameter Problem message to 1596 // the source of the packet, with an ICMP Code value of 1 1597 // ("unrecognized Next Header type encountered") and the ICMP 1598 // Pointer field containing the offset of the unrecognized value 1599 // within the original packet. 1600 // 1601 // Which when taken together indicate that an unknown protocol should 1602 // be treated as an unrecognized next header value. 1603 // The location of the Next Header field is in a different place in 1604 // the initial IPv6 header than it is in the extension headers so 1605 // treat it specially. 1606 prevHdrIDOffset := uint32(header.IPv6NextHeaderOffset) 1607 if previousHeaderStart != 0 { 1608 prevHdrIDOffset = previousHeaderStart 1609 } 1610 _ = e.protocol.returnError(&icmpReasonParameterProblem{ 1611 code: header.ICMPv6UnknownHeader, 1612 pointer: prevHdrIDOffset, 1613 }, pkt) 1614 return fmt.Errorf("transport protocol unreachable") 1615 default: 1616 panic(fmt.Sprintf("unrecognized result from DeliverTransportPacket = %d", res)) 1617 } 1618 } 1619 1620 default: 1621 // Since the iterator returns IPv6RawPayloadHeader for unknown Extension 1622 // Header IDs this should never happen unless we missed a supported type 1623 // here. 1624 panic(fmt.Sprintf("unrecognized type from it.Next() = %T", extHdr)) 1625 1626 } 1627 } 1628 return nil 1629 } 1630 1631 // Close cleans up resources associated with the endpoint. 1632 func (e *endpoint) Close() { 1633 e.mu.Lock() 1634 e.disableLocked() 1635 e.mu.addressableEndpointState.Cleanup() 1636 e.mu.Unlock() 1637 1638 e.protocol.forgetEndpoint(e.nic.ID()) 1639 } 1640 1641 // NetworkProtocolNumber implements stack.NetworkEndpoint. 1642 func (e *endpoint) NetworkProtocolNumber() tcpip.NetworkProtocolNumber { 1643 return e.protocol.Number() 1644 } 1645 1646 // AddAndAcquirePermanentAddress implements stack.AddressableEndpoint. 1647 func (e *endpoint) AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1648 // TODO(b/169350103): add checks here after making sure we no longer receive 1649 // an empty address. 1650 e.mu.Lock() 1651 defer e.mu.Unlock() 1652 return e.addAndAcquirePermanentAddressLocked(addr, properties) 1653 } 1654 1655 // addAndAcquirePermanentAddressLocked is like AddAndAcquirePermanentAddress but 1656 // with locking requirements. 1657 // 1658 // addAndAcquirePermanentAddressLocked also joins the passed address's 1659 // solicited-node multicast group and start duplicate address detection. 1660 // 1661 // Precondition: e.mu must be write locked. 1662 func (e *endpoint) addAndAcquirePermanentAddressLocked(addr tcpip.AddressWithPrefix, properties stack.AddressProperties) (stack.AddressEndpoint, tcpip.Error) { 1663 addressEndpoint, err := e.mu.addressableEndpointState.AddAndAcquirePermanentAddress(addr, properties) 1664 if err != nil { 1665 return nil, err 1666 } 1667 1668 if !header.IsV6UnicastAddress(addr.Address) { 1669 return addressEndpoint, nil 1670 } 1671 1672 addressEndpoint.SetKind(stack.PermanentTentative) 1673 1674 if e.Enabled() { 1675 if err := e.mu.ndp.startDuplicateAddressDetection(addr.Address, addressEndpoint); err != nil { 1676 return nil, err 1677 } 1678 } 1679 1680 snmc := header.SolicitedNodeAddr(addr.Address) 1681 if err := e.joinGroupLocked(snmc); err != nil { 1682 // joinGroupLocked only returns an error if the group address is not a valid 1683 // IPv6 multicast address. 1684 panic(fmt.Sprintf("e.joinGroupLocked(%s): %s", snmc, err)) 1685 } 1686 1687 return addressEndpoint, nil 1688 } 1689 1690 // RemovePermanentAddress implements stack.AddressableEndpoint. 1691 func (e *endpoint) RemovePermanentAddress(addr tcpip.Address) tcpip.Error { 1692 e.mu.Lock() 1693 defer e.mu.Unlock() 1694 1695 addressEndpoint := e.getAddressRLocked(addr) 1696 if addressEndpoint == nil || !addressEndpoint.GetKind().IsPermanent() { 1697 return &tcpip.ErrBadLocalAddress{} 1698 } 1699 1700 return e.removePermanentEndpointLocked(addressEndpoint, true /* allowSLAACInvalidation */, &stack.DADAborted{}) 1701 } 1702 1703 // removePermanentEndpointLocked is like removePermanentAddressLocked except 1704 // it works with a stack.AddressEndpoint. 1705 // 1706 // Precondition: e.mu must be write locked. 1707 func (e *endpoint) removePermanentEndpointLocked(addressEndpoint stack.AddressEndpoint, allowSLAACInvalidation bool, dadResult stack.DADResult) tcpip.Error { 1708 addr := addressEndpoint.AddressWithPrefix() 1709 // If we are removing an address generated via SLAAC, cleanup 1710 // its SLAAC resources and notify the integrator. 1711 switch addressEndpoint.ConfigType() { 1712 case stack.AddressConfigSlaac: 1713 e.mu.ndp.cleanupSLAACAddrResourcesAndNotify(addr, allowSLAACInvalidation) 1714 case stack.AddressConfigSlaacTemp: 1715 e.mu.ndp.cleanupTempSLAACAddrResourcesAndNotify(addr) 1716 } 1717 1718 return e.removePermanentEndpointInnerLocked(addressEndpoint, dadResult) 1719 } 1720 1721 // removePermanentEndpointInnerLocked is like removePermanentEndpointLocked 1722 // except it does not cleanup SLAAC address state. 1723 // 1724 // Precondition: e.mu must be write locked. 1725 func (e *endpoint) removePermanentEndpointInnerLocked(addressEndpoint stack.AddressEndpoint, dadResult stack.DADResult) tcpip.Error { 1726 addr := addressEndpoint.AddressWithPrefix() 1727 e.mu.ndp.stopDuplicateAddressDetection(addr.Address, dadResult) 1728 1729 if err := e.mu.addressableEndpointState.RemovePermanentEndpoint(addressEndpoint); err != nil { 1730 return err 1731 } 1732 1733 snmc := header.SolicitedNodeAddr(addr.Address) 1734 err := e.leaveGroupLocked(snmc) 1735 // The endpoint may have already left the multicast group. 1736 if _, ok := err.(*tcpip.ErrBadLocalAddress); ok { 1737 err = nil 1738 } 1739 return err 1740 } 1741 1742 // hasPermanentAddressLocked returns true if the endpoint has a permanent 1743 // address equal to the passed address. 1744 // 1745 // Precondition: e.mu must be read or write locked. 1746 func (e *endpoint) hasPermanentAddressRLocked(addr tcpip.Address) bool { 1747 addressEndpoint := e.getAddressRLocked(addr) 1748 if addressEndpoint == nil { 1749 return false 1750 } 1751 return addressEndpoint.GetKind().IsPermanent() 1752 } 1753 1754 // getAddressRLocked returns the endpoint for the passed address. 1755 // 1756 // Precondition: e.mu must be read or write locked. 1757 func (e *endpoint) getAddressRLocked(localAddr tcpip.Address) stack.AddressEndpoint { 1758 return e.mu.addressableEndpointState.GetAddress(localAddr) 1759 } 1760 1761 // MainAddress implements stack.AddressableEndpoint. 1762 func (e *endpoint) MainAddress() tcpip.AddressWithPrefix { 1763 e.mu.RLock() 1764 defer e.mu.RUnlock() 1765 return e.mu.addressableEndpointState.MainAddress() 1766 } 1767 1768 // AcquireAssignedAddress implements stack.AddressableEndpoint. 1769 func (e *endpoint) AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { 1770 e.mu.RLock() 1771 defer e.mu.RUnlock() 1772 return e.acquireAddressOrCreateTempLocked(localAddr, allowTemp, tempPEB) 1773 } 1774 1775 // acquireAddressOrCreateTempLocked is like AcquireAssignedAddress but with 1776 // locking requirements. 1777 // 1778 // Precondition: e.mu must be write locked. 1779 func (e *endpoint) acquireAddressOrCreateTempLocked(localAddr tcpip.Address, allowTemp bool, tempPEB stack.PrimaryEndpointBehavior) stack.AddressEndpoint { 1780 return e.mu.addressableEndpointState.AcquireAssignedAddress(localAddr, allowTemp, tempPEB) 1781 } 1782 1783 // AcquireOutgoingPrimaryAddress implements stack.AddressableEndpoint. 1784 func (e *endpoint) AcquireOutgoingPrimaryAddress(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1785 e.mu.RLock() 1786 defer e.mu.RUnlock() 1787 return e.acquireOutgoingPrimaryAddressRLocked(remoteAddr, allowExpired) 1788 } 1789 1790 // getLinkLocalAddressRLocked returns a link-local address from the primary list 1791 // of addresses, if one is available. 1792 // 1793 // See stack.PrimaryEndpointBehavior for more details about the primary list. 1794 // 1795 // Precondition: e.mu must be read locked. 1796 func (e *endpoint) getLinkLocalAddressRLocked() tcpip.Address { 1797 var linkLocalAddr tcpip.Address 1798 e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { 1799 if addressEndpoint.IsAssigned(false /* allowExpired */) { 1800 if addr := addressEndpoint.AddressWithPrefix().Address; header.IsV6LinkLocalUnicastAddress(addr) { 1801 linkLocalAddr = addr 1802 return false 1803 } 1804 } 1805 return true 1806 }) 1807 return linkLocalAddr 1808 } 1809 1810 // acquireOutgoingPrimaryAddressRLocked is like AcquireOutgoingPrimaryAddress 1811 // but with locking requirements. 1812 // 1813 // Precondition: e.mu must be read locked. 1814 func (e *endpoint) acquireOutgoingPrimaryAddressRLocked(remoteAddr tcpip.Address, allowExpired bool) stack.AddressEndpoint { 1815 // addrCandidate is a candidate for Source Address Selection, as per 1816 // RFC 6724 section 5. 1817 type addrCandidate struct { 1818 addressEndpoint stack.AddressEndpoint 1819 addr tcpip.Address 1820 scope header.IPv6AddressScope 1821 1822 label uint8 1823 matchingPrefix uint8 1824 } 1825 1826 if len(remoteAddr) == 0 { 1827 return e.mu.addressableEndpointState.AcquireOutgoingPrimaryAddress(remoteAddr, allowExpired) 1828 } 1829 1830 // Create a candidate set of available addresses we can potentially use as a 1831 // source address. 1832 var cs []addrCandidate 1833 e.mu.addressableEndpointState.ForEachPrimaryEndpoint(func(addressEndpoint stack.AddressEndpoint) bool { 1834 // If r is not valid for outgoing connections, it is not a valid endpoint. 1835 if !addressEndpoint.IsAssigned(allowExpired) { 1836 return true 1837 } 1838 1839 addr := addressEndpoint.AddressWithPrefix().Address 1840 scope, err := header.ScopeForIPv6Address(addr) 1841 if err != nil { 1842 // Should never happen as we got r from the primary IPv6 endpoint list and 1843 // ScopeForIPv6Address only returns an error if addr is not an IPv6 1844 // address. 1845 panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", addr, err)) 1846 } 1847 1848 cs = append(cs, addrCandidate{ 1849 addressEndpoint: addressEndpoint, 1850 addr: addr, 1851 scope: scope, 1852 label: getLabel(addr), 1853 matchingPrefix: remoteAddr.MatchingPrefix(addr), 1854 }) 1855 1856 return true 1857 }) 1858 1859 remoteScope, err := header.ScopeForIPv6Address(remoteAddr) 1860 if err != nil { 1861 // primaryIPv6Endpoint should never be called with an invalid IPv6 address. 1862 panic(fmt.Sprintf("header.ScopeForIPv6Address(%s): %s", remoteAddr, err)) 1863 } 1864 1865 remoteLabel := getLabel(remoteAddr) 1866 1867 // Sort the addresses as per RFC 6724 section 5 rules 1-3. 1868 // 1869 // TODO(b/146021396): Implement rules 4, 5 of RFC 6724 section 5. 1870 sort.Slice(cs, func(i, j int) bool { 1871 sa := cs[i] 1872 sb := cs[j] 1873 1874 // Prefer same address as per RFC 6724 section 5 rule 1. 1875 if sa.addr == remoteAddr { 1876 return true 1877 } 1878 if sb.addr == remoteAddr { 1879 return false 1880 } 1881 1882 // Prefer appropriate scope as per RFC 6724 section 5 rule 2. 1883 if sa.scope < sb.scope { 1884 return sa.scope >= remoteScope 1885 } else if sb.scope < sa.scope { 1886 return sb.scope < remoteScope 1887 } 1888 1889 // Avoid deprecated addresses as per RFC 6724 section 5 rule 3. 1890 if saDep, sbDep := sa.addressEndpoint.Deprecated(), sb.addressEndpoint.Deprecated(); saDep != sbDep { 1891 // If sa is not deprecated, it is preferred over sb. 1892 return sbDep 1893 } 1894 1895 // Prefer matching label as per RFC 6724 section 5 rule 6. 1896 if sa, sb := sa.label == remoteLabel, sb.label == remoteLabel; sa != sb { 1897 if sa { 1898 return true 1899 } 1900 if sb { 1901 return false 1902 } 1903 } 1904 1905 // Prefer temporary addresses as per RFC 6724 section 5 rule 7. 1906 if saTemp, sbTemp := sa.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp, sb.addressEndpoint.ConfigType() == stack.AddressConfigSlaacTemp; saTemp != sbTemp { 1907 return saTemp 1908 } 1909 1910 // Use longest matching prefix as per RFC 6724 section 5 rule 8. 1911 if sa.matchingPrefix > sb.matchingPrefix { 1912 return true 1913 } 1914 if sb.matchingPrefix > sa.matchingPrefix { 1915 return false 1916 } 1917 1918 // sa and sb are equal, return the endpoint that is closest to the front of 1919 // the primary endpoint list. 1920 return i < j 1921 }) 1922 1923 // Return the most preferred address that can have its reference count 1924 // incremented. 1925 for _, c := range cs { 1926 if c.addressEndpoint.IncRef() { 1927 return c.addressEndpoint 1928 } 1929 } 1930 1931 return nil 1932 } 1933 1934 // PrimaryAddresses implements stack.AddressableEndpoint. 1935 func (e *endpoint) PrimaryAddresses() []tcpip.AddressWithPrefix { 1936 e.mu.RLock() 1937 defer e.mu.RUnlock() 1938 return e.mu.addressableEndpointState.PrimaryAddresses() 1939 } 1940 1941 // PermanentAddresses implements stack.AddressableEndpoint. 1942 func (e *endpoint) PermanentAddresses() []tcpip.AddressWithPrefix { 1943 e.mu.RLock() 1944 defer e.mu.RUnlock() 1945 return e.mu.addressableEndpointState.PermanentAddresses() 1946 } 1947 1948 // JoinGroup implements stack.GroupAddressableEndpoint. 1949 func (e *endpoint) JoinGroup(addr tcpip.Address) tcpip.Error { 1950 e.mu.Lock() 1951 defer e.mu.Unlock() 1952 return e.joinGroupLocked(addr) 1953 } 1954 1955 // joinGroupLocked is like JoinGroup but with locking requirements. 1956 // 1957 // Precondition: e.mu must be locked. 1958 func (e *endpoint) joinGroupLocked(addr tcpip.Address) tcpip.Error { 1959 if !header.IsV6MulticastAddress(addr) { 1960 return &tcpip.ErrBadAddress{} 1961 } 1962 1963 e.mu.mld.joinGroup(addr) 1964 return nil 1965 } 1966 1967 // LeaveGroup implements stack.GroupAddressableEndpoint. 1968 func (e *endpoint) LeaveGroup(addr tcpip.Address) tcpip.Error { 1969 e.mu.Lock() 1970 defer e.mu.Unlock() 1971 return e.leaveGroupLocked(addr) 1972 } 1973 1974 // leaveGroupLocked is like LeaveGroup but with locking requirements. 1975 // 1976 // Precondition: e.mu must be locked. 1977 func (e *endpoint) leaveGroupLocked(addr tcpip.Address) tcpip.Error { 1978 return e.mu.mld.leaveGroup(addr) 1979 } 1980 1981 // IsInGroup implements stack.GroupAddressableEndpoint. 1982 func (e *endpoint) IsInGroup(addr tcpip.Address) bool { 1983 e.mu.RLock() 1984 defer e.mu.RUnlock() 1985 return e.mu.mld.isInGroup(addr) 1986 } 1987 1988 // Stats implements stack.NetworkEndpoint. 1989 func (e *endpoint) Stats() stack.NetworkEndpointStats { 1990 return &e.stats.localStats 1991 } 1992 1993 var _ stack.NetworkProtocol = (*protocol)(nil) 1994 var _ fragmentation.TimeoutHandler = (*protocol)(nil) 1995 1996 type protocol struct { 1997 stack *stack.Stack 1998 options Options 1999 2000 mu struct { 2001 sync.RWMutex 2002 2003 // eps is keyed by NICID to allow protocol methods to retrieve an endpoint 2004 // when handling a packet, by looking at which NIC handled the packet. 2005 eps map[tcpip.NICID]*endpoint 2006 2007 // ICMP types for which the stack's global rate limiting must apply. 2008 icmpRateLimitedTypes map[header.ICMPv6Type]struct{} 2009 } 2010 2011 ids []uint32 2012 hashIV uint32 2013 2014 // defaultTTL is the current default TTL for the protocol. Only the 2015 // uint8 portion of it is meaningful. 2016 // 2017 // Must be accessed using atomic operations. 2018 defaultTTL uint32 2019 2020 fragmentation *fragmentation.Fragmentation 2021 icmpRateLimiter *stack.ICMPRateLimiter 2022 } 2023 2024 // Number returns the ipv6 protocol number. 2025 func (p *protocol) Number() tcpip.NetworkProtocolNumber { 2026 return ProtocolNumber 2027 } 2028 2029 // MinimumPacketSize returns the minimum valid ipv6 packet size. 2030 func (p *protocol) MinimumPacketSize() int { 2031 return header.IPv6MinimumSize 2032 } 2033 2034 // ParseAddresses implements stack.NetworkProtocol. 2035 func (*protocol) ParseAddresses(v buffer.View) (src, dst tcpip.Address) { 2036 h := header.IPv6(v) 2037 return h.SourceAddress(), h.DestinationAddress() 2038 } 2039 2040 // NewEndpoint creates a new ipv6 endpoint. 2041 func (p *protocol) NewEndpoint(nic stack.NetworkInterface, dispatcher stack.TransportDispatcher) stack.NetworkEndpoint { 2042 e := &endpoint{ 2043 nic: nic, 2044 dispatcher: dispatcher, 2045 protocol: p, 2046 } 2047 2048 // NDP options must be 8 octet aligned and the first 2 bytes are used for 2049 // the type and length fields leaving 6 octets as the minimum size for a 2050 // nonce option without padding. 2051 const nonceSize = 6 2052 2053 // As per RFC 7527 section 4.1, 2054 // 2055 // If any probe is looped back within RetransTimer milliseconds after 2056 // having sent DupAddrDetectTransmits NS(DAD) messages, the interface 2057 // continues with another MAX_MULTICAST_SOLICIT number of NS(DAD) 2058 // messages transmitted RetransTimer milliseconds apart. 2059 // 2060 // Value taken from RFC 4861 section 10. 2061 const maxMulticastSolicit = 3 2062 dadOptions := ip.DADOptions{ 2063 Clock: p.stack.Clock(), 2064 SecureRNG: p.stack.SecureRNG(), 2065 NonceSize: nonceSize, 2066 ExtendDADTransmits: maxMulticastSolicit, 2067 Protocol: &e.mu.ndp, 2068 NICID: nic.ID(), 2069 } 2070 2071 e.mu.Lock() 2072 e.mu.addressableEndpointState.Init(e) 2073 e.mu.ndp.init(e, dadOptions) 2074 e.mu.mld.init(e) 2075 e.dad.mu.Lock() 2076 e.dad.mu.dad.Init(&e.dad.mu, p.options.DADConfigs, dadOptions) 2077 e.dad.mu.Unlock() 2078 e.mu.Unlock() 2079 2080 stackStats := p.stack.Stats() 2081 tcpip.InitStatCounters(reflect.ValueOf(&e.stats.localStats).Elem()) 2082 e.stats.ip.Init(&e.stats.localStats.IP, &stackStats.IP) 2083 e.stats.icmp.init(&e.stats.localStats.ICMP, &stackStats.ICMP.V6) 2084 2085 p.mu.Lock() 2086 defer p.mu.Unlock() 2087 p.mu.eps[nic.ID()] = e 2088 return e 2089 } 2090 2091 func (p *protocol) findEndpointWithAddress(addr tcpip.Address) *endpoint { 2092 p.mu.RLock() 2093 defer p.mu.RUnlock() 2094 2095 for _, e := range p.mu.eps { 2096 if addressEndpoint := e.AcquireAssignedAddress(addr, false /* allowTemp */, stack.NeverPrimaryEndpoint); addressEndpoint != nil { 2097 addressEndpoint.DecRef() 2098 return e 2099 } 2100 } 2101 2102 return nil 2103 } 2104 2105 func (p *protocol) getEndpointForNIC(id tcpip.NICID) (*endpoint, bool) { 2106 p.mu.RLock() 2107 defer p.mu.RUnlock() 2108 ep, ok := p.mu.eps[id] 2109 return ep, ok 2110 } 2111 2112 func (p *protocol) forgetEndpoint(nicID tcpip.NICID) { 2113 p.mu.Lock() 2114 defer p.mu.Unlock() 2115 delete(p.mu.eps, nicID) 2116 } 2117 2118 // SetOption implements stack.NetworkProtocol. 2119 func (p *protocol) SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error { 2120 switch v := option.(type) { 2121 case *tcpip.DefaultTTLOption: 2122 p.SetDefaultTTL(uint8(*v)) 2123 return nil 2124 default: 2125 return &tcpip.ErrUnknownProtocolOption{} 2126 } 2127 } 2128 2129 // Option implements stack.NetworkProtocol. 2130 func (p *protocol) Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error { 2131 switch v := option.(type) { 2132 case *tcpip.DefaultTTLOption: 2133 *v = tcpip.DefaultTTLOption(p.DefaultTTL()) 2134 return nil 2135 default: 2136 return &tcpip.ErrUnknownProtocolOption{} 2137 } 2138 } 2139 2140 // SetDefaultTTL sets the default TTL for endpoints created with this protocol. 2141 func (p *protocol) SetDefaultTTL(ttl uint8) { 2142 atomic.StoreUint32(&p.defaultTTL, uint32(ttl)) 2143 } 2144 2145 // DefaultTTL returns the default TTL for endpoints created with this protocol. 2146 func (p *protocol) DefaultTTL() uint8 { 2147 return uint8(atomic.LoadUint32(&p.defaultTTL)) 2148 } 2149 2150 // Close implements stack.TransportProtocol. 2151 func (*protocol) Close() {} 2152 2153 // Wait implements stack.TransportProtocol. 2154 func (*protocol) Wait() {} 2155 2156 // parseAndValidate parses the packet (including its transport layer header) and 2157 // returns the parsed IP header. 2158 // 2159 // Returns true if the IP header was successfully parsed. 2160 func (p *protocol) parseAndValidate(pkt *stack.PacketBuffer) (header.IPv6, bool) { 2161 transProtoNum, hasTransportHdr, ok := p.Parse(pkt) 2162 if !ok { 2163 return nil, false 2164 } 2165 2166 h := header.IPv6(pkt.NetworkHeader().View()) 2167 // Do not include the link header's size when calculating the size of the IP 2168 // packet. 2169 if !h.IsValid(pkt.Size() - pkt.LinkHeader().View().Size()) { 2170 return nil, false 2171 } 2172 2173 if hasTransportHdr { 2174 p.parseTransport(pkt, transProtoNum) 2175 } 2176 2177 return h, true 2178 } 2179 2180 func (p *protocol) parseTransport(pkt *stack.PacketBuffer, transProtoNum tcpip.TransportProtocolNumber) { 2181 if transProtoNum == header.ICMPv6ProtocolNumber { 2182 // The transport layer will handle transport layer parsing errors. 2183 _ = parse.ICMPv6(pkt) 2184 return 2185 } 2186 2187 switch err := p.stack.ParsePacketBufferTransport(transProtoNum, pkt); err { 2188 case stack.ParsedOK: 2189 case stack.UnknownTransportProtocol, stack.TransportLayerParseError: 2190 // The transport layer will handle unknown protocols and transport layer 2191 // parsing errors. 2192 default: 2193 panic(fmt.Sprintf("unexpected error parsing transport header = %d", err)) 2194 } 2195 } 2196 2197 // Parse implements stack.NetworkProtocol. 2198 func (*protocol) Parse(pkt *stack.PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool) { 2199 proto, _, fragOffset, fragMore, ok := parse.IPv6(pkt) 2200 if !ok { 2201 return 0, false, false 2202 } 2203 2204 return proto, !fragMore && fragOffset == 0, true 2205 } 2206 2207 // allowICMPReply reports whether an ICMP reply with provided type may 2208 // be sent following the rate mask options and global ICMP rate limiter. 2209 func (p *protocol) allowICMPReply(icmpType header.ICMPv6Type) bool { 2210 p.mu.RLock() 2211 defer p.mu.RUnlock() 2212 2213 if _, ok := p.mu.icmpRateLimitedTypes[icmpType]; ok { 2214 return p.stack.AllowICMPMessage() 2215 } 2216 return true 2217 } 2218 2219 // calculateNetworkMTU calculates the network-layer payload MTU based on the 2220 // link-layer payload MTU and the length of every IPv6 header. 2221 // Note that this is different than the Payload Length field of the IPv6 header, 2222 // which includes the length of the extension headers. 2223 func calculateNetworkMTU(linkMTU, networkHeadersLen uint32) (uint32, tcpip.Error) { 2224 if linkMTU < header.IPv6MinimumMTU { 2225 return 0, &tcpip.ErrInvalidEndpointState{} 2226 } 2227 2228 // As per RFC 7112 section 5, we should discard packets if their IPv6 header 2229 // is bigger than 1280 bytes (ie, the minimum link MTU) since we do not 2230 // support PMTU discovery: 2231 // Hosts that do not discover the Path MTU MUST limit the IPv6 Header Chain 2232 // length to 1280 bytes. Limiting the IPv6 Header Chain length to 1280 2233 // bytes ensures that the header chain length does not exceed the IPv6 2234 // minimum MTU. 2235 if networkHeadersLen > header.IPv6MinimumMTU { 2236 return 0, &tcpip.ErrMalformedHeader{} 2237 } 2238 2239 networkMTU := linkMTU - networkHeadersLen 2240 if networkMTU > maxPayloadSize { 2241 networkMTU = maxPayloadSize 2242 } 2243 return networkMTU, nil 2244 } 2245 2246 // Options holds options to configure a new protocol. 2247 type Options struct { 2248 // NDPConfigs is the default NDP configurations used by interfaces. 2249 NDPConfigs NDPConfigurations 2250 2251 // AutoGenLinkLocal determines whether or not the stack attempts to 2252 // auto-generate a link-local address for newly enabled non-loopback 2253 // NICs. 2254 // 2255 // Note, setting this to true does not mean that a link-local address is 2256 // assigned right away, or at all. If Duplicate Address Detection is enabled, 2257 // an address is only assigned if it successfully resolves. If it fails, no 2258 // further attempts are made to auto-generate a link-local address. 2259 // 2260 // The generated link-local address follows RFC 4291 Appendix A guidelines. 2261 AutoGenLinkLocal bool 2262 2263 // NDPDisp is the NDP event dispatcher that an integrator can provide to 2264 // receive NDP related events. 2265 NDPDisp NDPDispatcher 2266 2267 // OpaqueIIDOpts hold the options for generating opaque interface 2268 // identifiers (IIDs) as outlined by RFC 7217. 2269 OpaqueIIDOpts OpaqueInterfaceIdentifierOptions 2270 2271 // TempIIDSeed is used to seed the initial temporary interface identifier 2272 // history value used to generate IIDs for temporary SLAAC addresses. 2273 // 2274 // Temporary SLAAC addresses are short-lived addresses which are unpredictable 2275 // and random from the perspective of other nodes on the network. It is 2276 // recommended that the seed be a random byte buffer of at least 2277 // header.IIDSize bytes to make sure that temporary SLAAC addresses are 2278 // sufficiently random. It should follow minimum randomness requirements for 2279 // security as outlined by RFC 4086. 2280 // 2281 // Note: using a nil value, the same seed across netstack program runs, or a 2282 // seed that is too small would reduce randomness and increase predictability, 2283 // defeating the purpose of temporary SLAAC addresses. 2284 TempIIDSeed []byte 2285 2286 // MLD holds options for MLD. 2287 MLD MLDOptions 2288 2289 // DADConfigs holds the default DAD configurations used by IPv6 endpoints. 2290 DADConfigs stack.DADConfigurations 2291 2292 // AllowExternalLoopbackTraffic indicates that inbound loopback packets (i.e. 2293 // martian loopback packets) should be accepted. 2294 AllowExternalLoopbackTraffic bool 2295 } 2296 2297 // NewProtocolWithOptions returns an IPv6 network protocol. 2298 func NewProtocolWithOptions(opts Options) stack.NetworkProtocolFactory { 2299 opts.NDPConfigs.validate() 2300 2301 ids := hash.RandN32(buckets) 2302 hashIV := hash.RandN32(1)[0] 2303 2304 return func(s *stack.Stack) stack.NetworkProtocol { 2305 p := &protocol{ 2306 stack: s, 2307 options: opts, 2308 2309 ids: ids, 2310 hashIV: hashIV, 2311 } 2312 p.fragmentation = fragmentation.NewFragmentation(header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit, fragmentation.HighFragThreshold, fragmentation.LowFragThreshold, ReassembleTimeout, s.Clock(), p) 2313 p.mu.eps = make(map[tcpip.NICID]*endpoint) 2314 p.SetDefaultTTL(DefaultTTL) 2315 // Set default ICMP rate limiting to Linux defaults. 2316 // 2317 // Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big) 2318 // See https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt. 2319 defaultIcmpTypes := make(map[header.ICMPv6Type]struct{}) 2320 for i := header.ICMPv6Type(0); i < header.ICMPv6EchoRequest; i++ { 2321 switch i { 2322 case header.ICMPv6PacketTooBig: 2323 // Do not rate limit packet too big by default. 2324 default: 2325 defaultIcmpTypes[i] = struct{}{} 2326 } 2327 } 2328 p.mu.icmpRateLimitedTypes = defaultIcmpTypes 2329 2330 return p 2331 } 2332 } 2333 2334 // NewProtocol is equivalent to NewProtocolWithOptions with an empty Options. 2335 func NewProtocol(s *stack.Stack) stack.NetworkProtocol { 2336 return NewProtocolWithOptions(Options{})(s) 2337 } 2338 2339 func calculateFragmentReserve(pkt *stack.PacketBuffer) int { 2340 return pkt.AvailableHeaderBytes() + pkt.NetworkHeader().View().Size() + header.IPv6FragmentHeaderSize 2341 } 2342 2343 // hashRoute calculates a hash value for the given route. It uses the source & 2344 // destination address and 32-bit number to generate the hash. 2345 func hashRoute(r *stack.Route, hashIV uint32) uint32 { 2346 // The FNV-1a was chosen because it is a fast hashing algorithm, and 2347 // cryptographic properties are not needed here. 2348 h := fnv.New32a() 2349 if _, err := h.Write([]byte(r.LocalAddress())); err != nil { 2350 panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err)) 2351 } 2352 2353 if _, err := h.Write([]byte(r.RemoteAddress())); err != nil { 2354 panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected to ever return an error", err)) 2355 } 2356 2357 s := make([]byte, 4) 2358 binary.LittleEndian.PutUint32(s, hashIV) 2359 if _, err := h.Write(s); err != nil { 2360 panic(fmt.Sprintf("Hash.Write: %s, but Hash' implementation of Write is not expected ever to return an error", err)) 2361 } 2362 2363 return h.Sum32() 2364 } 2365 2366 func buildNextFragment(pf *fragmentation.PacketFragmenter, originalIPHeaders header.IPv6, transportProto tcpip.TransportProtocolNumber, id uint32) (*stack.PacketBuffer, bool) { 2367 fragPkt, offset, copied, more := pf.BuildNextFragment() 2368 fragPkt.NetworkProtocolNumber = ProtocolNumber 2369 2370 originalIPHeadersLength := len(originalIPHeaders) 2371 2372 s := header.IPv6ExtHdrSerializer{&header.IPv6SerializableFragmentExtHdr{ 2373 FragmentOffset: uint16(offset / header.IPv6FragmentExtHdrFragmentOffsetBytesPerUnit), 2374 M: more, 2375 Identification: id, 2376 }} 2377 2378 fragmentIPHeadersLength := originalIPHeadersLength + s.Length() 2379 fragmentIPHeaders := header.IPv6(fragPkt.NetworkHeader().Push(fragmentIPHeadersLength)) 2380 2381 // Copy the IPv6 header and any extension headers already populated. 2382 if copied := copy(fragmentIPHeaders, originalIPHeaders); copied != originalIPHeadersLength { 2383 panic(fmt.Sprintf("wrong number of bytes copied into fragmentIPHeaders: got %d, want %d", copied, originalIPHeadersLength)) 2384 } 2385 2386 nextHeader, _ := s.Serialize(transportProto, fragmentIPHeaders[originalIPHeadersLength:]) 2387 2388 fragmentIPHeaders.SetNextHeader(nextHeader) 2389 fragmentIPHeaders.SetPayloadLength(uint16(copied + fragmentIPHeadersLength - header.IPv6MinimumSize)) 2390 2391 return fragPkt, more 2392 }