github.com/polevpn/netstack@v1.10.9/tcpip/stack/nic.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "strings" 19 "sync" 20 "sync/atomic" 21 22 "github.com/polevpn/netstack/tcpip" 23 "github.com/polevpn/netstack/tcpip/buffer" 24 "github.com/polevpn/netstack/tcpip/header" 25 ) 26 27 // NIC represents a "network interface card" to which the networking stack is 28 // attached. 29 type NIC struct { 30 stack *Stack 31 id tcpip.NICID 32 name string 33 linkEP LinkEndpoint 34 loopback bool 35 36 mu sync.RWMutex 37 spoofing bool 38 promiscuous bool 39 primary map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint 40 endpoints map[NetworkEndpointID]*referencedNetworkEndpoint 41 addressRanges []tcpip.Subnet 42 mcastJoins map[NetworkEndpointID]int32 43 // packetEPs is protected by mu, but the contained PacketEndpoint 44 // values are not. 45 packetEPs map[tcpip.NetworkProtocolNumber][]PacketEndpoint 46 47 stats NICStats 48 49 // ndp is the NDP related state for NIC. 50 // 51 // Note, read and write operations on ndp require that the NIC is 52 // appropriately locked. 53 ndp ndpState 54 } 55 56 // NICStats includes transmitted and received stats. 57 type NICStats struct { 58 Tx DirectionStats 59 Rx DirectionStats 60 } 61 62 // DirectionStats includes packet and byte counts. 63 type DirectionStats struct { 64 Packets *tcpip.StatCounter 65 Bytes *tcpip.StatCounter 66 } 67 68 // PrimaryEndpointBehavior is an enumeration of an endpoint's primacy behavior. 69 type PrimaryEndpointBehavior int 70 71 const ( 72 // CanBePrimaryEndpoint indicates the endpoint can be used as a primary 73 // endpoint for new connections with no local address. This is the 74 // default when calling NIC.AddAddress. 75 CanBePrimaryEndpoint PrimaryEndpointBehavior = iota 76 77 // FirstPrimaryEndpoint indicates the endpoint should be the first 78 // primary endpoint considered. If there are multiple endpoints with 79 // this behavior, the most recently-added one will be first. 80 FirstPrimaryEndpoint 81 82 // NeverPrimaryEndpoint indicates the endpoint should never be a 83 // primary endpoint. 84 NeverPrimaryEndpoint 85 ) 86 87 // newNIC returns a new NIC using the default NDP configurations from stack. 88 func newNIC(stack *Stack, id tcpip.NICID, name string, ep LinkEndpoint, loopback bool) *NIC { 89 // TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For 90 // example, make sure that the link address it provides is a valid 91 // unicast ethernet address. 92 93 // TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints 94 // observe an MTU of at least 1280 bytes. Ensure that this requirement 95 // of IPv6 is supported on this endpoint's LinkEndpoint. 96 97 nic := &NIC{ 98 stack: stack, 99 id: id, 100 name: name, 101 linkEP: ep, 102 loopback: loopback, 103 primary: make(map[tcpip.NetworkProtocolNumber][]*referencedNetworkEndpoint), 104 endpoints: make(map[NetworkEndpointID]*referencedNetworkEndpoint), 105 mcastJoins: make(map[NetworkEndpointID]int32), 106 packetEPs: make(map[tcpip.NetworkProtocolNumber][]PacketEndpoint), 107 stats: NICStats{ 108 Tx: DirectionStats{ 109 Packets: &tcpip.StatCounter{}, 110 Bytes: &tcpip.StatCounter{}, 111 }, 112 Rx: DirectionStats{ 113 Packets: &tcpip.StatCounter{}, 114 Bytes: &tcpip.StatCounter{}, 115 }, 116 }, 117 ndp: ndpState{ 118 configs: stack.ndpConfigs, 119 dad: make(map[tcpip.Address]dadState), 120 defaultRouters: make(map[tcpip.Address]defaultRouterState), 121 onLinkPrefixes: make(map[tcpip.Subnet]onLinkPrefixState), 122 }, 123 } 124 nic.ndp.nic = nic 125 126 // Register supported packet endpoint protocols. 127 for _, netProto := range header.Ethertypes { 128 nic.packetEPs[netProto] = []PacketEndpoint{} 129 } 130 for _, netProto := range stack.networkProtocols { 131 nic.packetEPs[netProto.Number()] = []PacketEndpoint{} 132 } 133 134 return nic 135 } 136 137 // enable enables the NIC. enable will attach the link to its LinkEndpoint and 138 // join the IPv6 All-Nodes Multicast address (ff02::1). 139 func (n *NIC) enable() *tcpip.Error { 140 n.attachLinkEndpoint() 141 142 // Create an endpoint to receive broadcast packets on this interface. 143 if _, ok := n.stack.networkProtocols[header.IPv4ProtocolNumber]; ok { 144 if err := n.AddAddress(tcpip.ProtocolAddress{ 145 Protocol: header.IPv4ProtocolNumber, 146 AddressWithPrefix: tcpip.AddressWithPrefix{header.IPv4Broadcast, 8 * header.IPv4AddressSize}, 147 }, NeverPrimaryEndpoint); err != nil { 148 return err 149 } 150 } 151 152 // Join the IPv6 All-Nodes Multicast group if the stack is configured to 153 // use IPv6. This is required to ensure that this node properly receives 154 // and responds to the various NDP messages that are destined to the 155 // all-nodes multicast address. An example is the Neighbor Advertisement 156 // when we perform Duplicate Address Detection, or Router Advertisement 157 // when we do Router Discovery. See RFC 4862, section 5.4.2 and RFC 4861 158 // section 4.2 for more information. 159 // 160 // Also auto-generate an IPv6 link-local address based on the NIC's 161 // link address if it is configured to do so. Note, each interface is 162 // required to have IPv6 link-local unicast address, as per RFC 4291 163 // section 2.1. 164 _, ok := n.stack.networkProtocols[header.IPv6ProtocolNumber] 165 if !ok { 166 return nil 167 } 168 169 n.mu.Lock() 170 defer n.mu.Unlock() 171 172 if err := n.joinGroupLocked(header.IPv6ProtocolNumber, header.IPv6AllNodesMulticastAddress); err != nil { 173 return err 174 } 175 176 if !n.stack.autoGenIPv6LinkLocal { 177 return nil 178 } 179 180 l2addr := n.linkEP.LinkAddress() 181 182 // Only attempt to generate the link-local address if we have a 183 // valid MAC address. 184 // 185 // TODO(b/141011931): Validate a LinkEndpoint's link address 186 // (provided by LinkEndpoint.LinkAddress) before reaching this 187 // point. 188 if !header.IsValidUnicastEthernetAddress(l2addr) { 189 return nil 190 } 191 192 addr := header.LinkLocalAddr(l2addr) 193 194 _, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{ 195 Protocol: header.IPv6ProtocolNumber, 196 AddressWithPrefix: tcpip.AddressWithPrefix{ 197 Address: addr, 198 PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen, 199 }, 200 }, CanBePrimaryEndpoint) 201 202 return err 203 } 204 205 // attachLinkEndpoint attaches the NIC to the endpoint, which will enable it 206 // to start delivering packets. 207 func (n *NIC) attachLinkEndpoint() { 208 n.linkEP.Attach(n) 209 } 210 211 // setPromiscuousMode enables or disables promiscuous mode. 212 func (n *NIC) setPromiscuousMode(enable bool) { 213 n.mu.Lock() 214 n.promiscuous = enable 215 n.mu.Unlock() 216 } 217 218 func (n *NIC) isPromiscuousMode() bool { 219 n.mu.RLock() 220 rv := n.promiscuous 221 n.mu.RUnlock() 222 return rv 223 } 224 225 // setSpoofing enables or disables address spoofing. 226 func (n *NIC) setSpoofing(enable bool) { 227 n.mu.Lock() 228 n.spoofing = enable 229 n.mu.Unlock() 230 } 231 232 // primaryEndpoint returns the primary endpoint of n for the given network 233 // protocol. 234 func (n *NIC) primaryEndpoint(protocol tcpip.NetworkProtocolNumber) *referencedNetworkEndpoint { 235 n.mu.RLock() 236 defer n.mu.RUnlock() 237 238 for _, r := range n.primary[protocol] { 239 if r.isValidForOutgoing() && r.tryIncRef() { 240 return r 241 } 242 } 243 244 return nil 245 } 246 247 func (n *NIC) getRef(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) *referencedNetworkEndpoint { 248 return n.getRefOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, n.promiscuous) 249 } 250 251 // findEndpoint finds the endpoint, if any, with the given address. 252 func (n *NIC) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) *referencedNetworkEndpoint { 253 return n.getRefOrCreateTemp(protocol, address, peb, n.spoofing) 254 } 255 256 // getRefEpOrCreateTemp returns the referenced network endpoint for the given 257 // protocol and address. If none exists a temporary one may be created if 258 // we are in promiscuous mode or spoofing. 259 func (n *NIC) getRefOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, spoofingOrPromiscuous bool) *referencedNetworkEndpoint { 260 id := NetworkEndpointID{address} 261 262 n.mu.RLock() 263 264 if ref, ok := n.endpoints[id]; ok { 265 // An endpoint with this id exists, check if it can be used and return it. 266 switch ref.getKind() { 267 case permanentExpired: 268 if !spoofingOrPromiscuous { 269 n.mu.RUnlock() 270 return nil 271 } 272 fallthrough 273 case temporary, permanent: 274 if ref.tryIncRef() { 275 n.mu.RUnlock() 276 return ref 277 } 278 } 279 } 280 281 // A usable reference was not found, create a temporary one if requested by 282 // the caller or if the address is found in the NIC's subnets. 283 createTempEP := spoofingOrPromiscuous 284 if !createTempEP { 285 for _, sn := range n.addressRanges { 286 // Skip the subnet address. 287 if address == sn.ID() { 288 continue 289 } 290 // For now just skip the broadcast address, until we support it. 291 // FIXME(b/137608825): Add support for sending/receiving directed 292 // (subnet) broadcast. 293 if address == sn.Broadcast() { 294 continue 295 } 296 if sn.Contains(address) { 297 createTempEP = true 298 break 299 } 300 } 301 } 302 303 n.mu.RUnlock() 304 305 if !createTempEP { 306 return nil 307 } 308 309 // Try again with the lock in exclusive mode. If we still can't get the 310 // endpoint, create a new "temporary" endpoint. It will only exist while 311 // there's a route through it. 312 n.mu.Lock() 313 if ref, ok := n.endpoints[id]; ok { 314 // No need to check the type as we are ok with expired endpoints at this 315 // point. 316 if ref.tryIncRef() { 317 n.mu.Unlock() 318 return ref 319 } 320 // tryIncRef failing means the endpoint is scheduled to be removed once the 321 // lock is released. Remove it here so we can create a new (temporary) one. 322 // The removal logic waiting for the lock handles this case. 323 n.removeEndpointLocked(ref) 324 } 325 326 // Add a new temporary endpoint. 327 netProto, ok := n.stack.networkProtocols[protocol] 328 if !ok { 329 n.mu.Unlock() 330 return nil 331 } 332 ref, _ := n.addAddressLocked(tcpip.ProtocolAddress{ 333 Protocol: protocol, 334 AddressWithPrefix: tcpip.AddressWithPrefix{ 335 Address: address, 336 PrefixLen: netProto.DefaultPrefixLen(), 337 }, 338 }, peb, temporary) 339 340 n.mu.Unlock() 341 return ref 342 } 343 344 func (n *NIC) addPermanentAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) (*referencedNetworkEndpoint, *tcpip.Error) { 345 id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address} 346 if ref, ok := n.endpoints[id]; ok { 347 switch ref.getKind() { 348 case permanentTentative, permanent: 349 // The NIC already have a permanent endpoint with that address. 350 return nil, tcpip.ErrDuplicateAddress 351 case permanentExpired, temporary: 352 // Promote the endpoint to become permanent and respect 353 // the new peb. 354 if ref.tryIncRef() { 355 ref.setKind(permanent) 356 357 refs := n.primary[ref.protocol] 358 for i, r := range refs { 359 if r == ref { 360 switch peb { 361 case CanBePrimaryEndpoint: 362 return ref, nil 363 case FirstPrimaryEndpoint: 364 if i == 0 { 365 return ref, nil 366 } 367 n.primary[r.protocol] = append(refs[:i], refs[i+1:]...) 368 case NeverPrimaryEndpoint: 369 n.primary[r.protocol] = append(refs[:i], refs[i+1:]...) 370 return ref, nil 371 } 372 } 373 } 374 375 n.insertPrimaryEndpointLocked(ref, peb) 376 377 return ref, nil 378 } 379 // tryIncRef failing means the endpoint is scheduled to be removed once 380 // the lock is released. Remove it here so we can create a new 381 // (permanent) one. The removal logic waiting for the lock handles this 382 // case. 383 n.removeEndpointLocked(ref) 384 } 385 } 386 387 return n.addAddressLocked(protocolAddress, peb, permanent) 388 } 389 390 func (n *NIC) addAddressLocked(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior, kind networkEndpointKind) (*referencedNetworkEndpoint, *tcpip.Error) { 391 // TODO(b/141022673): Validate IP address before adding them. 392 393 // Sanity check. 394 id := NetworkEndpointID{protocolAddress.AddressWithPrefix.Address} 395 if _, ok := n.endpoints[id]; ok { 396 // Endpoint already exists. 397 return nil, tcpip.ErrDuplicateAddress 398 } 399 400 netProto, ok := n.stack.networkProtocols[protocolAddress.Protocol] 401 if !ok { 402 return nil, tcpip.ErrUnknownProtocol 403 } 404 405 // Create the new network endpoint. 406 ep, err := netProto.NewEndpoint(n.id, protocolAddress.AddressWithPrefix, n.stack, n, n.linkEP) 407 if err != nil { 408 return nil, err 409 } 410 411 isIPv6Unicast := protocolAddress.Protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(protocolAddress.AddressWithPrefix.Address) 412 413 // If the address is an IPv6 address and it is a permanent address, 414 // mark it as tentative so it goes through the DAD process. 415 if isIPv6Unicast && kind == permanent { 416 kind = permanentTentative 417 } 418 419 ref := &referencedNetworkEndpoint{ 420 refs: 1, 421 ep: ep, 422 nic: n, 423 protocol: protocolAddress.Protocol, 424 kind: kind, 425 } 426 427 // Set up cache if link address resolution exists for this protocol. 428 if n.linkEP.Capabilities()&CapabilityResolutionRequired != 0 { 429 if _, ok := n.stack.linkAddrResolvers[protocolAddress.Protocol]; ok { 430 ref.linkCache = n.stack 431 } 432 } 433 434 // If we are adding an IPv6 unicast address, join the solicited-node 435 // multicast address. 436 if isIPv6Unicast { 437 snmc := header.SolicitedNodeAddr(protocolAddress.AddressWithPrefix.Address) 438 if err := n.joinGroupLocked(protocolAddress.Protocol, snmc); err != nil { 439 return nil, err 440 } 441 } 442 443 n.endpoints[id] = ref 444 445 n.insertPrimaryEndpointLocked(ref, peb) 446 447 // If we are adding a tentative IPv6 address, start DAD. 448 if isIPv6Unicast && kind == permanentTentative { 449 if err := n.ndp.startDuplicateAddressDetection(protocolAddress.AddressWithPrefix.Address, ref); err != nil { 450 return nil, err 451 } 452 } 453 454 return ref, nil 455 } 456 457 // AddAddress adds a new address to n, so that it starts accepting packets 458 // targeted at the given address (and network protocol). 459 func (n *NIC) AddAddress(protocolAddress tcpip.ProtocolAddress, peb PrimaryEndpointBehavior) *tcpip.Error { 460 // Add the endpoint. 461 n.mu.Lock() 462 _, err := n.addPermanentAddressLocked(protocolAddress, peb) 463 n.mu.Unlock() 464 465 return err 466 } 467 468 // AllAddresses returns all addresses (primary and non-primary) associated with 469 // this NIC. 470 func (n *NIC) AllAddresses() []tcpip.ProtocolAddress { 471 n.mu.RLock() 472 defer n.mu.RUnlock() 473 474 addrs := make([]tcpip.ProtocolAddress, 0, len(n.endpoints)) 475 for nid, ref := range n.endpoints { 476 // Don't include tentative, expired or temporary endpoints to 477 // avoid confusion and prevent the caller from using those. 478 switch ref.getKind() { 479 case permanentTentative, permanentExpired, temporary: 480 // TODO(b/140898488): Should tentative addresses be 481 // returned? 482 continue 483 } 484 addrs = append(addrs, tcpip.ProtocolAddress{ 485 Protocol: ref.protocol, 486 AddressWithPrefix: tcpip.AddressWithPrefix{ 487 Address: nid.LocalAddress, 488 PrefixLen: ref.ep.PrefixLen(), 489 }, 490 }) 491 } 492 return addrs 493 } 494 495 // PrimaryAddresses returns the primary addresses associated with this NIC. 496 func (n *NIC) PrimaryAddresses() []tcpip.ProtocolAddress { 497 n.mu.RLock() 498 defer n.mu.RUnlock() 499 500 var addrs []tcpip.ProtocolAddress 501 for proto, list := range n.primary { 502 for _, ref := range list { 503 // Don't include tentative, expired or tempory endpoints 504 // to avoid confusion and prevent the caller from using 505 // those. 506 switch ref.getKind() { 507 case permanentTentative, permanentExpired, temporary: 508 continue 509 } 510 511 addrs = append(addrs, tcpip.ProtocolAddress{ 512 Protocol: proto, 513 AddressWithPrefix: tcpip.AddressWithPrefix{ 514 Address: ref.ep.ID().LocalAddress, 515 PrefixLen: ref.ep.PrefixLen(), 516 }, 517 }) 518 } 519 } 520 return addrs 521 } 522 523 // AddAddressRange adds a range of addresses to n, so that it starts accepting 524 // packets targeted at the given addresses and network protocol. The range is 525 // given by a subnet address, and all addresses contained in the subnet are 526 // used except for the subnet address itself and the subnet's broadcast 527 // address. 528 func (n *NIC) AddAddressRange(protocol tcpip.NetworkProtocolNumber, subnet tcpip.Subnet) { 529 n.mu.Lock() 530 n.addressRanges = append(n.addressRanges, subnet) 531 n.mu.Unlock() 532 } 533 534 // RemoveAddressRange removes the given address range from n. 535 func (n *NIC) RemoveAddressRange(subnet tcpip.Subnet) { 536 n.mu.Lock() 537 538 // Use the same underlying array. 539 tmp := n.addressRanges[:0] 540 for _, sub := range n.addressRanges { 541 if sub != subnet { 542 tmp = append(tmp, sub) 543 } 544 } 545 n.addressRanges = tmp 546 547 n.mu.Unlock() 548 } 549 550 // Subnets returns the Subnets associated with this NIC. 551 func (n *NIC) AddressRanges() []tcpip.Subnet { 552 n.mu.RLock() 553 defer n.mu.RUnlock() 554 sns := make([]tcpip.Subnet, 0, len(n.addressRanges)+len(n.endpoints)) 555 for nid := range n.endpoints { 556 sn, err := tcpip.NewSubnet(nid.LocalAddress, tcpip.AddressMask(strings.Repeat("\xff", len(nid.LocalAddress)))) 557 if err != nil { 558 // This should never happen as the mask has been carefully crafted to 559 // match the address. 560 panic("Invalid endpoint subnet: " + err.Error()) 561 } 562 sns = append(sns, sn) 563 } 564 return append(sns, n.addressRanges...) 565 } 566 567 // insertPrimaryEndpointLocked adds r to n's primary endpoint list as required 568 // by peb. 569 // 570 // n MUST be locked. 571 func (n *NIC) insertPrimaryEndpointLocked(r *referencedNetworkEndpoint, peb PrimaryEndpointBehavior) { 572 switch peb { 573 case CanBePrimaryEndpoint: 574 n.primary[r.protocol] = append(n.primary[r.protocol], r) 575 case FirstPrimaryEndpoint: 576 n.primary[r.protocol] = append([]*referencedNetworkEndpoint{r}, n.primary[r.protocol]...) 577 } 578 } 579 580 func (n *NIC) removeEndpointLocked(r *referencedNetworkEndpoint) { 581 id := *r.ep.ID() 582 583 // Nothing to do if the reference has already been replaced with a different 584 // one. This happens in the case where 1) this endpoint's ref count hit zero 585 // and was waiting (on the lock) to be removed and 2) the same address was 586 // re-added in the meantime by removing this endpoint from the list and 587 // adding a new one. 588 if n.endpoints[id] != r { 589 return 590 } 591 592 if r.getKind() == permanent { 593 panic("Reference count dropped to zero before being removed") 594 } 595 596 delete(n.endpoints, id) 597 refs := n.primary[r.protocol] 598 for i, ref := range refs { 599 if ref == r { 600 n.primary[r.protocol] = append(refs[:i], refs[i+1:]...) 601 break 602 } 603 } 604 605 r.ep.Close() 606 } 607 608 func (n *NIC) removeEndpoint(r *referencedNetworkEndpoint) { 609 n.mu.Lock() 610 n.removeEndpointLocked(r) 611 n.mu.Unlock() 612 } 613 614 func (n *NIC) removePermanentAddressLocked(addr tcpip.Address) *tcpip.Error { 615 r, ok := n.endpoints[NetworkEndpointID{addr}] 616 if !ok { 617 return tcpip.ErrBadLocalAddress 618 } 619 620 kind := r.getKind() 621 if kind != permanent && kind != permanentTentative { 622 return tcpip.ErrBadLocalAddress 623 } 624 625 isIPv6Unicast := r.protocol == header.IPv6ProtocolNumber && header.IsV6UnicastAddress(addr) 626 627 // If we are removing a tentative IPv6 unicast address, stop DAD. 628 if isIPv6Unicast && kind == permanentTentative { 629 n.ndp.stopDuplicateAddressDetection(addr) 630 } 631 632 r.setKind(permanentExpired) 633 if !r.decRefLocked() { 634 // The endpoint still has references to it. 635 return nil 636 } 637 638 // At this point the endpoint is deleted. 639 640 // If we are removing an IPv6 unicast address, leave the solicited-node 641 // multicast address. 642 if isIPv6Unicast { 643 snmc := header.SolicitedNodeAddr(addr) 644 if err := n.leaveGroupLocked(snmc); err != nil { 645 return err 646 } 647 } 648 649 return nil 650 } 651 652 // RemoveAddress removes an address from n. 653 func (n *NIC) RemoveAddress(addr tcpip.Address) *tcpip.Error { 654 n.mu.Lock() 655 defer n.mu.Unlock() 656 return n.removePermanentAddressLocked(addr) 657 } 658 659 // joinGroup adds a new endpoint for the given multicast address, if none 660 // exists yet. Otherwise it just increments its count. 661 func (n *NIC) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { 662 n.mu.Lock() 663 defer n.mu.Unlock() 664 665 return n.joinGroupLocked(protocol, addr) 666 } 667 668 // joinGroupLocked adds a new endpoint for the given multicast address, if none 669 // exists yet. Otherwise it just increments its count. n MUST be locked before 670 // joinGroupLocked is called. 671 func (n *NIC) joinGroupLocked(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) *tcpip.Error { 672 // TODO(b/143102137): When implementing MLD, make sure MLD packets are 673 // not sent unless a valid link-local address is available for use on n 674 // as an MLD packet's source address must be a link-local address as 675 // outlined in RFC 3810 section 5. 676 677 id := NetworkEndpointID{addr} 678 joins := n.mcastJoins[id] 679 if joins == 0 { 680 netProto, ok := n.stack.networkProtocols[protocol] 681 if !ok { 682 return tcpip.ErrUnknownProtocol 683 } 684 if _, err := n.addPermanentAddressLocked(tcpip.ProtocolAddress{ 685 Protocol: protocol, 686 AddressWithPrefix: tcpip.AddressWithPrefix{ 687 Address: addr, 688 PrefixLen: netProto.DefaultPrefixLen(), 689 }, 690 }, NeverPrimaryEndpoint); err != nil { 691 return err 692 } 693 } 694 n.mcastJoins[id] = joins + 1 695 return nil 696 } 697 698 // leaveGroup decrements the count for the given multicast address, and when it 699 // reaches zero removes the endpoint for this address. 700 func (n *NIC) leaveGroup(addr tcpip.Address) *tcpip.Error { 701 n.mu.Lock() 702 defer n.mu.Unlock() 703 704 return n.leaveGroupLocked(addr) 705 } 706 707 // leaveGroupLocked decrements the count for the given multicast address, and 708 // when it reaches zero removes the endpoint for this address. n MUST be locked 709 // before leaveGroupLocked is called. 710 func (n *NIC) leaveGroupLocked(addr tcpip.Address) *tcpip.Error { 711 id := NetworkEndpointID{addr} 712 joins := n.mcastJoins[id] 713 switch joins { 714 case 0: 715 // There are no joins with this address on this NIC. 716 return tcpip.ErrBadLocalAddress 717 case 1: 718 // This is the last one, clean up. 719 if err := n.removePermanentAddressLocked(addr); err != nil { 720 return err 721 } 722 } 723 n.mcastJoins[id] = joins - 1 724 return nil 725 } 726 727 func handlePacket(protocol tcpip.NetworkProtocolNumber, dst, src tcpip.Address, localLinkAddr, remotelinkAddr tcpip.LinkAddress, ref *referencedNetworkEndpoint, pkt tcpip.PacketBuffer) { 728 r := makeRoute(protocol, dst, src, localLinkAddr, ref, false /* handleLocal */, false /* multicastLoop */) 729 r.RemoteLinkAddress = remotelinkAddr 730 ref.ep.HandlePacket(&r, pkt) 731 ref.decRef() 732 } 733 734 // DeliverNetworkPacket finds the appropriate network protocol endpoint and 735 // hands the packet over for further processing. This function is called when 736 // the NIC receives a packet from the physical interface. 737 // Note that the ownership of the slice backing vv is retained by the caller. 738 // This rule applies only to the slice itself, not to the items of the slice; 739 // the ownership of the items is not retained by the caller. 740 func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.LinkAddress, protocol tcpip.NetworkProtocolNumber, pkt tcpip.PacketBuffer) { 741 n.stats.Rx.Packets.Increment() 742 n.stats.Rx.Bytes.IncrementBy(uint64(pkt.Data.Size())) 743 744 netProto, ok := n.stack.networkProtocols[protocol] 745 if !ok { 746 n.stack.stats.UnknownProtocolRcvdPackets.Increment() 747 return 748 } 749 750 // If no local link layer address is provided, assume it was sent 751 // directly to this NIC. 752 if local == "" { 753 local = n.linkEP.LinkAddress() 754 } 755 756 // Are any packet sockets listening for this network protocol? 757 n.mu.RLock() 758 packetEPs := n.packetEPs[protocol] 759 // Check whether there are packet sockets listening for every protocol. 760 // If we received a packet with protocol EthernetProtocolAll, then the 761 // previous for loop will have handled it. 762 if protocol != header.EthernetProtocolAll { 763 packetEPs = append(packetEPs, n.packetEPs[header.EthernetProtocolAll]...) 764 } 765 n.mu.RUnlock() 766 for _, ep := range packetEPs { 767 ep.HandlePacket(n.id, local, protocol, pkt.Clone()) 768 } 769 770 if netProto.Number() == header.IPv4ProtocolNumber || netProto.Number() == header.IPv6ProtocolNumber { 771 n.stack.stats.IP.PacketsReceived.Increment() 772 } 773 774 if len(pkt.Data.First()) < netProto.MinimumPacketSize() { 775 n.stack.stats.MalformedRcvdPackets.Increment() 776 return 777 } 778 779 src, dst := netProto.ParseAddresses(pkt.Data.First()) 780 781 if ref := n.getRef(protocol, dst); ref != nil { 782 handlePacket(protocol, dst, src, linkEP.LinkAddress(), remote, ref, pkt) 783 return 784 } 785 786 // This NIC doesn't care about the packet. Find a NIC that cares about the 787 // packet and forward it to the NIC. 788 // 789 // TODO: Should we be forwarding the packet even if promiscuous? 790 if n.stack.Forwarding() { 791 r, err := n.stack.FindRoute(0, "", dst, protocol, false /* multicastLoop */) 792 if err != nil { 793 n.stack.stats.IP.InvalidAddressesReceived.Increment() 794 return 795 } 796 defer r.Release() 797 798 r.LocalLinkAddress = n.linkEP.LinkAddress() 799 r.RemoteLinkAddress = remote 800 801 // Found a NIC. 802 n := r.ref.nic 803 n.mu.RLock() 804 ref, ok := n.endpoints[NetworkEndpointID{dst}] 805 ok = ok && ref.isValidForOutgoing() && ref.tryIncRef() 806 n.mu.RUnlock() 807 if ok { 808 r.RemoteAddress = src 809 // TODO(b/123449044): Update the source NIC as well. 810 ref.ep.HandlePacket(&r, pkt) 811 ref.decRef() 812 } else { 813 // n doesn't have a destination endpoint. 814 // Send the packet out of n. 815 pkt.Header = buffer.NewPrependableFromView(pkt.Data.First()) 816 pkt.Data.RemoveFirst() 817 818 // TODO(b/128629022): use route.WritePacket. 819 if err := n.linkEP.WritePacket(&r, nil /* gso */, protocol, pkt); err != nil { 820 r.Stats().IP.OutgoingPacketErrors.Increment() 821 } else { 822 n.stats.Tx.Packets.Increment() 823 n.stats.Tx.Bytes.IncrementBy(uint64(pkt.Header.UsedLength() + pkt.Data.Size())) 824 } 825 } 826 return 827 } 828 829 // If a packet socket handled the packet, don't treat it as invalid. 830 if len(packetEPs) == 0 { 831 n.stack.stats.IP.InvalidAddressesReceived.Increment() 832 } 833 } 834 835 // DeliverTransportPacket delivers the packets to the appropriate transport 836 // protocol endpoint. 837 func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolNumber, pkt tcpip.PacketBuffer) { 838 state, ok := n.stack.transportProtocols[protocol] 839 if !ok { 840 n.stack.stats.UnknownProtocolRcvdPackets.Increment() 841 return 842 } 843 844 transProto := state.proto 845 846 // Raw socket packets are delivered based solely on the transport 847 // protocol number. We do not inspect the payload to ensure it's 848 // validly formed. 849 n.stack.demux.deliverRawPacket(r, protocol, pkt) 850 851 if len(pkt.Data.First()) < transProto.MinimumPacketSize() { 852 n.stack.stats.MalformedRcvdPackets.Increment() 853 return 854 } 855 856 srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First()) 857 if err != nil { 858 n.stack.stats.MalformedRcvdPackets.Increment() 859 return 860 } 861 862 id := TransportEndpointID{dstPort, r.LocalAddress, srcPort, r.RemoteAddress} 863 if n.stack.demux.deliverPacket(r, protocol, pkt, id) { 864 return 865 } 866 867 // Try to deliver to per-stack default handler. 868 if state.defaultHandler != nil { 869 if state.defaultHandler(r, id, pkt) { 870 return 871 } 872 } 873 874 // We could not find an appropriate destination for this packet, so 875 // deliver it to the global handler. 876 if !transProto.HandleUnknownDestinationPacket(r, id, pkt) { 877 n.stack.stats.MalformedRcvdPackets.Increment() 878 } 879 } 880 881 // DeliverTransportControlPacket delivers control packets to the appropriate 882 // transport protocol endpoint. 883 func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, typ ControlType, extra uint32, pkt tcpip.PacketBuffer) { 884 state, ok := n.stack.transportProtocols[trans] 885 if !ok { 886 return 887 } 888 889 transProto := state.proto 890 891 // ICMPv4 only guarantees that 8 bytes of the transport protocol will 892 // be present in the payload. We know that the ports are within the 893 // first 8 bytes for all known transport protocols. 894 if len(pkt.Data.First()) < 8 { 895 return 896 } 897 898 srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First()) 899 if err != nil { 900 return 901 } 902 903 id := TransportEndpointID{srcPort, local, dstPort, remote} 904 if n.stack.demux.deliverControlPacket(n, net, trans, typ, extra, pkt, id) { 905 return 906 } 907 } 908 909 // ID returns the identifier of n. 910 func (n *NIC) ID() tcpip.NICID { 911 return n.id 912 } 913 914 // Stack returns the instance of the Stack that owns this NIC. 915 func (n *NIC) Stack() *Stack { 916 return n.stack 917 } 918 919 // isAddrTentative returns true if addr is tentative on n. 920 // 921 // Note that if addr is not associated with n, then this function will return 922 // false. It will only return true if the address is associated with the NIC 923 // AND it is tentative. 924 func (n *NIC) isAddrTentative(addr tcpip.Address) bool { 925 ref, ok := n.endpoints[NetworkEndpointID{addr}] 926 if !ok { 927 return false 928 } 929 930 return ref.getKind() == permanentTentative 931 } 932 933 // dupTentativeAddrDetected attempts to inform n that a tentative addr 934 // is a duplicate on a link. 935 // 936 // dupTentativeAddrDetected will delete the tentative address if it exists. 937 func (n *NIC) dupTentativeAddrDetected(addr tcpip.Address) *tcpip.Error { 938 n.mu.Lock() 939 defer n.mu.Unlock() 940 941 ref, ok := n.endpoints[NetworkEndpointID{addr}] 942 if !ok { 943 return tcpip.ErrBadAddress 944 } 945 946 if ref.getKind() != permanentTentative { 947 return tcpip.ErrInvalidEndpointState 948 } 949 950 return n.removePermanentAddressLocked(addr) 951 } 952 953 // setNDPConfigs sets the NDP configurations for n. 954 // 955 // Note, if c contains invalid NDP configuration values, it will be fixed to 956 // use default values for the erroneous values. 957 func (n *NIC) setNDPConfigs(c NDPConfigurations) { 958 c.validate() 959 960 n.mu.Lock() 961 n.ndp.configs = c 962 n.mu.Unlock() 963 } 964 965 // handleNDPRA handles an NDP Router Advertisement message that arrived on n. 966 func (n *NIC) handleNDPRA(ip tcpip.Address, ra header.NDPRouterAdvert) { 967 n.mu.Lock() 968 defer n.mu.Unlock() 969 970 n.ndp.handleRA(ip, ra) 971 } 972 973 type networkEndpointKind int32 974 975 const ( 976 // A permanentTentative endpoint is a permanent address that is not yet 977 // considered to be fully bound to an interface in the traditional 978 // sense. That is, the address is associated with a NIC, but packets 979 // destined to the address MUST NOT be accepted and MUST be silently 980 // dropped, and the address MUST NOT be used as a source address for 981 // outgoing packets. For IPv6, addresses will be of this kind until 982 // NDP's Duplicate Address Detection has resolved, or be deleted if 983 // the process results in detecting a duplicate address. 984 permanentTentative networkEndpointKind = iota 985 986 // A permanent endpoint is created by adding a permanent address (vs. a 987 // temporary one) to the NIC. Its reference count is biased by 1 to avoid 988 // removal when no route holds a reference to it. It is removed by explicitly 989 // removing the permanent address from the NIC. 990 permanent 991 992 // An expired permanent endoint is a permanent endoint that had its address 993 // removed from the NIC, and it is waiting to be removed once no more routes 994 // hold a reference to it. This is achieved by decreasing its reference count 995 // by 1. If its address is re-added before the endpoint is removed, its type 996 // changes back to permanent and its reference count increases by 1 again. 997 permanentExpired 998 999 // A temporary endpoint is created for spoofing outgoing packets, or when in 1000 // promiscuous mode and accepting incoming packets that don't match any 1001 // permanent endpoint. Its reference count is not biased by 1 and the 1002 // endpoint is removed immediately when no more route holds a reference to 1003 // it. A temporary endpoint can be promoted to permanent if its address 1004 // is added permanently. 1005 temporary 1006 ) 1007 1008 func (n *NIC) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) *tcpip.Error { 1009 n.mu.Lock() 1010 defer n.mu.Unlock() 1011 1012 eps, ok := n.packetEPs[netProto] 1013 if !ok { 1014 return tcpip.ErrNotSupported 1015 } 1016 n.packetEPs[netProto] = append(eps, ep) 1017 1018 return nil 1019 } 1020 1021 func (n *NIC) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) { 1022 n.mu.Lock() 1023 defer n.mu.Unlock() 1024 1025 eps, ok := n.packetEPs[netProto] 1026 if !ok { 1027 return 1028 } 1029 1030 for i, epOther := range eps { 1031 if epOther == ep { 1032 n.packetEPs[netProto] = append(eps[:i], eps[i+1:]...) 1033 return 1034 } 1035 } 1036 } 1037 1038 type referencedNetworkEndpoint struct { 1039 ep NetworkEndpoint 1040 nic *NIC 1041 protocol tcpip.NetworkProtocolNumber 1042 1043 // linkCache is set if link address resolution is enabled for this 1044 // protocol. Set to nil otherwise. 1045 linkCache LinkAddressCache 1046 1047 // refs is counting references held for this endpoint. When refs hits zero it 1048 // triggers the automatic removal of the endpoint from the NIC. 1049 refs int32 1050 1051 // networkEndpointKind must only be accessed using {get,set}Kind(). 1052 kind networkEndpointKind 1053 } 1054 1055 func (r *referencedNetworkEndpoint) getKind() networkEndpointKind { 1056 return networkEndpointKind(atomic.LoadInt32((*int32)(&r.kind))) 1057 } 1058 1059 func (r *referencedNetworkEndpoint) setKind(kind networkEndpointKind) { 1060 atomic.StoreInt32((*int32)(&r.kind), int32(kind)) 1061 } 1062 1063 // isValidForOutgoing returns true if the endpoint can be used to send out a 1064 // packet. It requires the endpoint to not be marked expired (i.e., its address 1065 // has been removed), or the NIC to be in spoofing mode. 1066 func (r *referencedNetworkEndpoint) isValidForOutgoing() bool { 1067 return r.getKind() != permanentExpired || r.nic.spoofing 1068 } 1069 1070 // isValidForIncoming returns true if the endpoint can accept an incoming 1071 // packet. It requires the endpoint to not be marked expired (i.e., its address 1072 // has been removed), or the NIC to be in promiscuous mode. 1073 func (r *referencedNetworkEndpoint) isValidForIncoming() bool { 1074 return r.getKind() != permanentExpired || r.nic.promiscuous 1075 } 1076 1077 // decRef decrements the ref count and cleans up the endpoint once it reaches 1078 // zero. 1079 func (r *referencedNetworkEndpoint) decRef() { 1080 if atomic.AddInt32(&r.refs, -1) == 0 { 1081 r.nic.removeEndpoint(r) 1082 } 1083 } 1084 1085 // decRefLocked is the same as decRef but assumes that the NIC.mu mutex is 1086 // locked. Returns true if the endpoint was removed. 1087 func (r *referencedNetworkEndpoint) decRefLocked() bool { 1088 if atomic.AddInt32(&r.refs, -1) == 0 { 1089 r.nic.removeEndpointLocked(r) 1090 return true 1091 } 1092 1093 return false 1094 } 1095 1096 // incRef increments the ref count. It must only be called when the caller is 1097 // known to be holding a reference to the endpoint, otherwise tryIncRef should 1098 // be used. 1099 func (r *referencedNetworkEndpoint) incRef() { 1100 atomic.AddInt32(&r.refs, 1) 1101 } 1102 1103 // tryIncRef attempts to increment the ref count from n to n+1, but only if n is 1104 // not zero. That is, it will increment the count if the endpoint is still 1105 // alive, and do nothing if it has already been clean up. 1106 func (r *referencedNetworkEndpoint) tryIncRef() bool { 1107 for { 1108 v := atomic.LoadInt32(&r.refs) 1109 if v == 0 { 1110 return false 1111 } 1112 1113 if atomic.CompareAndSwapInt32(&r.refs, v, v+1) { 1114 return true 1115 } 1116 } 1117 } 1118 1119 // stack returns the Stack instance that owns the underlying endpoint. 1120 func (r *referencedNetworkEndpoint) stack() *Stack { 1121 return r.nic.stack 1122 }