github.com/openshift/dpu-operator@v0.0.0-20240502153209-3af840d137c2/dpu-cni/pkgs/sriovutils/packet.go (about)

     1  package sriovutils
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"net"
     8  	"syscall"
     9  
    10  	current "github.com/containernetworking/cni/pkg/types/100"
    11  	"github.com/vishvananda/netlink"
    12  	"golang.org/x/net/icmp"
    13  	"golang.org/x/net/ipv6"
    14  )
    15  
    16  var (
    17  	arpPacketName    = "ARP"
    18  	icmpV6PacketName = "ICMPv6"
    19  )
    20  
    21  // htons converts an uint16 from host to network byte order.
    22  func htons(i uint16) uint16 {
    23  	return (i<<8)&0xff00 | i>>8
    24  }
    25  
    26  // formatPacketFieldWriteError builds an error string for the cases when writing to a field of a packet fails.
    27  func formatPacketFieldWriteError(field string, packetType string, writeErr error) error {
    28  	return fmt.Errorf("failed to write the %s field in the %s packet: %v", field, packetType, writeErr)
    29  }
    30  
    31  // SendGratuitousArp sends a gratuitous ARP packet with the provided source IP over the provided interface.
    32  func SendGratuitousArp(srcIP net.IP, linkObj netlink.Link) error {
    33  	/* As per RFC 5944 section 4.6, a gratuitous ARP packet can be sent by a node in order to spontaneously cause other nodes to update
    34  	 * an entry in their ARP cache. In the case of SRIOV-CNI, an address can be reused for different pods. Each pod could likely have a
    35  	 * different link-layer address in this scenario, which makes the ARP cache entries residing in the other nodes to be an invalid.
    36  	 * The gratuitous ARP packet should update the link-layer address accordingly for the invalid ARP cache.
    37  	 */
    38  
    39  	// Construct the ARP packet following RFC 5944 section 4.6.
    40  	arpPacket := new(bytes.Buffer)
    41  	if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(1)); writeErr != nil { // Hardware Type: 1 is Ethernet
    42  		return formatPacketFieldWriteError("Hardware Type", arpPacketName, writeErr)
    43  	}
    44  	if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(syscall.ETH_P_IP)); writeErr != nil { // Protocol Type: 0x0800 is IPv4
    45  		return formatPacketFieldWriteError("Protocol Type", arpPacketName, writeErr)
    46  	}
    47  	if writeErr := binary.Write(arpPacket, binary.BigEndian, uint8(6)); writeErr != nil { // Hardware address Length: 6 bytes for MAC address
    48  		return formatPacketFieldWriteError("Hardware address Length", arpPacketName, writeErr)
    49  	}
    50  	if writeErr := binary.Write(arpPacket, binary.BigEndian, uint8(4)); writeErr != nil { // Protocol address length: 4 bytes for IPv4 address
    51  		return formatPacketFieldWriteError("Protocol address length", arpPacketName, writeErr)
    52  	}
    53  	if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(1)); writeErr != nil { // Operation: 1 is request, 2 is response
    54  		return formatPacketFieldWriteError("Operation", arpPacketName, writeErr)
    55  	}
    56  	if _, writeErr := arpPacket.Write(linkObj.Attrs().HardwareAddr); writeErr != nil { // Sender hardware address
    57  		return formatPacketFieldWriteError("Sender hardware address", arpPacketName, writeErr)
    58  	}
    59  	if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Sender protocol address
    60  		return formatPacketFieldWriteError("Sender protocol address", arpPacketName, writeErr)
    61  	}
    62  	if _, writeErr := arpPacket.Write([]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}); writeErr != nil { // Target hardware address is the Broadcast MAC.
    63  		return formatPacketFieldWriteError("Target hardware address", arpPacketName, writeErr)
    64  	}
    65  	if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Target protocol address
    66  		return formatPacketFieldWriteError("Target protocol address", arpPacketName, writeErr)
    67  	}
    68  
    69  	sockAddr := syscall.SockaddrLinklayer{
    70  		Protocol: htons(syscall.ETH_P_ARP),                                // Ethertype of ARP (0x0806)
    71  		Ifindex:  linkObj.Attrs().Index,                                   // Interface Index
    72  		Hatype:   1,                                                       // Hardware Type: 1 is Ethernet
    73  		Pkttype:  0,                                                       // Packet Type.
    74  		Halen:    6,                                                       // Hardware address Length: 6 bytes for MAC address
    75  		Addr:     [8]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // Address is the broadcast MAC address.
    76  	}
    77  
    78  	// Create a socket such that the Ethernet header would constructed by the OS. The arpPacket only contains the ARP payload.
    79  	soc, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_DGRAM, int(htons(syscall.ETH_P_ARP)))
    80  	if err != nil {
    81  		return fmt.Errorf("failed to create AF_PACKET datagram socket: %v", err)
    82  	}
    83  	defer syscall.Close(soc)
    84  
    85  	if err := syscall.Sendto(soc, arpPacket.Bytes(), 0, &sockAddr); err != nil {
    86  		return fmt.Errorf("failed to send Gratuitous ARP for IPv4 %s on Interface %s: %v", srcIP.String(), linkObj.Attrs().Name, err)
    87  	}
    88  
    89  	return nil
    90  }
    91  
    92  // SendUnsolicitedNeighborAdvertisement sends an unsolicited neighbor advertisement packet with the provided source IP over the provided interface.
    93  func SendUnsolicitedNeighborAdvertisement(srcIP net.IP, linkObj netlink.Link) error {
    94  	/* As per RFC 4861, a link-layer address change can multicast a few unsolicited neighbor advertisements to all nodes to quickly
    95  	 * update the cached link-layer addresses that have become invalid. In the case of SRIOV-CNI, an address can be reused for
    96  	 * different pods. Each pod could likely have a different link-layer address in this scenario, which makes the Neighbor Cache
    97  	 * entries residing in the neighbors to be an invalid. The unsolicited neighbor advertisement should update the link-layer address
    98  	 * accordingly for the IPv6 entry.
    99  	 * However if any of these conditions are true:
   100  	 *  - The IPv6 address was not reused for the new pod.
   101  	 *  - No prior established communication with the neighbor.
   102  	 * Then the neighbor receiving this unsolicited neighbor advertisement would be silently discard. This behavior is described
   103  	 * in RFC 4861 section 7.2.5. This is acceptable behavior since the purpose of sending an unsolicited neighbor advertisement
   104  	 * is not to create a new entry but rather update already existing invalid entries.
   105  	 */
   106  
   107  	// Construct the ICMPv6 Neighbor Advertisement packet following RFC 4861.
   108  	payload := new(bytes.Buffer)
   109  	// ICMPv6 Flags: As per RFC 4861, the solicited flag must not be set and the override flag should be set (to
   110  	// override existing cache entry) for unsolicited advertisements.
   111  	if writeErr := binary.Write(payload, binary.BigEndian, uint32(0x20000000)); writeErr != nil {
   112  		return formatPacketFieldWriteError("Flags", icmpV6PacketName, writeErr)
   113  	}
   114  	if _, writeErr := payload.Write(srcIP.To16()); writeErr != nil { // ICMPv6 Target IPv6 Address.
   115  		return formatPacketFieldWriteError("Target IPv6 Address", icmpV6PacketName, writeErr)
   116  	}
   117  	if writeErr := binary.Write(payload, binary.BigEndian, uint8(2)); writeErr != nil { // ICMPv6 Option Type: 2 is target link-layer address.
   118  		return formatPacketFieldWriteError("Option Type", icmpV6PacketName, writeErr)
   119  	}
   120  	if writeErr := binary.Write(payload, binary.BigEndian, uint8(1)); writeErr != nil { // ICMPv6 Option Length. Units of 8 bytes.
   121  		return formatPacketFieldWriteError("Option Length", icmpV6PacketName, writeErr)
   122  	}
   123  	if _, writeErr := payload.Write(linkObj.Attrs().HardwareAddr); writeErr != nil { // ICMPv6 Option Link-layer Address.
   124  		return formatPacketFieldWriteError("Option Link-layer Address", icmpV6PacketName, writeErr)
   125  	}
   126  
   127  	icmpv6Msg := icmp.Message{
   128  		Type:     ipv6.ICMPTypeNeighborAdvertisement, // ICMPv6 type is neighbor advertisement.
   129  		Code:     0,                                  // ICMPv6 Code: As per RFC 4861 section 7.1.2, the code is always 0.
   130  		Checksum: 0,                                  // Checksum is calculated later.
   131  		Body: &icmp.RawBody{
   132  			Data: payload.Bytes(),
   133  		},
   134  	}
   135  
   136  	// Get the byte array of the ICMPv6 Message.
   137  	icmpv6Bytes, err := icmpv6Msg.Marshal(nil)
   138  	if err != nil {
   139  		return fmt.Errorf("failed to Marshal ICMPv6 Message: %v", err)
   140  	}
   141  
   142  	// Create a socket such that the Ethernet header and IPv6 header would constructed by the OS.
   143  	soc, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_ICMPV6)
   144  	if err != nil {
   145  		return fmt.Errorf("failed to create AF_INET6 raw socket: %v", err)
   146  	}
   147  	defer syscall.Close(soc)
   148  
   149  	// As per RFC 4861 section 7.1.2, the IPv6 hop limit is always 255.
   150  	if err := syscall.SetsockoptInt(soc, syscall.IPPROTO_IPV6, syscall.IPV6_MULTICAST_HOPS, 255); err != nil {
   151  		return fmt.Errorf("failed to set IPv6 multicast hops to 255: %v", err)
   152  	}
   153  
   154  	// Set the destination IPv6 address to the IPv6 link-local all nodes multicast address (ff02::1).
   155  	var r [16]byte
   156  	copy(r[:], net.IPv6linklocalallnodes.To16())
   157  	sockAddr := syscall.SockaddrInet6{Addr: r}
   158  	if err := syscall.Sendto(soc, icmpv6Bytes, 0, &sockAddr); err != nil {
   159  		return fmt.Errorf("failed to send Unsolicited Neighbor Advertisement for IPv6 %s on Interface %s: %v", srcIP.String(), linkObj.Attrs().Name, err)
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // AnnounceIPs sends either a GARP or Unsolicited NA depending on the IP address type (IPv4 vs. IPv6 respectively) configured on the interface.
   166  func AnnounceIPs(ifName string, ipConfigs []*current.IPConfig) error {
   167  	myNetLink := MyNetlink{}
   168  
   169  	// Retrieve the interface name in the container.
   170  	linkObj, err := myNetLink.LinkByName(ifName)
   171  	if err != nil {
   172  		return fmt.Errorf("failed to get netlink device with name %q: %v", ifName, err)
   173  	}
   174  	if !IsValidMACAddress(linkObj.Attrs().HardwareAddr) {
   175  		return fmt.Errorf("invalid Ethernet MAC address: %q", linkObj.Attrs().HardwareAddr)
   176  	}
   177  
   178  	// For all the IP addresses assigned by IPAM, we will send either a GARP (IPv4) or Unsolicited NA (IPv6).
   179  	for _, ipc := range ipConfigs {
   180  		var err error
   181  		if IsIPv6(ipc.Address.IP) {
   182  			/* As per RFC 4861, sending unsolicited neighbor advertisements should be considered as a performance
   183  			* optimization. It does not reliably update caches in all nodes. The Neighbor Unreachability Detection
   184  			* algorithm is more reliable although it may take slightly longer to update.
   185  			 */
   186  			err = SendUnsolicitedNeighborAdvertisement(ipc.Address.IP, linkObj)
   187  		} else if IsIPv4(ipc.Address.IP) {
   188  			err = SendGratuitousArp(ipc.Address.IP, linkObj)
   189  		} else {
   190  			return fmt.Errorf("the IP %s on interface %q is neither IPv4 or IPv6", ipc.Address.IP.String(), ifName)
   191  		}
   192  
   193  		if err != nil {
   194  			return fmt.Errorf("failed to send GARP/NA message for ip %s on interface %q: %v", ipc.Address.IP.String(), ifName, err)
   195  		}
   196  	}
   197  	return nil
   198  }