github.com/openshift/dpu-operator@v0.0.0-20240502153209-3af840d137c2/dpu-cni/pkgs/sriovutils/packet.go (about) 1 package sriovutils 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "net" 8 "syscall" 9 10 current "github.com/containernetworking/cni/pkg/types/100" 11 "github.com/vishvananda/netlink" 12 "golang.org/x/net/icmp" 13 "golang.org/x/net/ipv6" 14 ) 15 16 var ( 17 arpPacketName = "ARP" 18 icmpV6PacketName = "ICMPv6" 19 ) 20 21 // htons converts an uint16 from host to network byte order. 22 func htons(i uint16) uint16 { 23 return (i<<8)&0xff00 | i>>8 24 } 25 26 // formatPacketFieldWriteError builds an error string for the cases when writing to a field of a packet fails. 27 func formatPacketFieldWriteError(field string, packetType string, writeErr error) error { 28 return fmt.Errorf("failed to write the %s field in the %s packet: %v", field, packetType, writeErr) 29 } 30 31 // SendGratuitousArp sends a gratuitous ARP packet with the provided source IP over the provided interface. 32 func SendGratuitousArp(srcIP net.IP, linkObj netlink.Link) error { 33 /* As per RFC 5944 section 4.6, a gratuitous ARP packet can be sent by a node in order to spontaneously cause other nodes to update 34 * an entry in their ARP cache. In the case of SRIOV-CNI, an address can be reused for different pods. Each pod could likely have a 35 * different link-layer address in this scenario, which makes the ARP cache entries residing in the other nodes to be an invalid. 36 * The gratuitous ARP packet should update the link-layer address accordingly for the invalid ARP cache. 37 */ 38 39 // Construct the ARP packet following RFC 5944 section 4.6. 40 arpPacket := new(bytes.Buffer) 41 if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(1)); writeErr != nil { // Hardware Type: 1 is Ethernet 42 return formatPacketFieldWriteError("Hardware Type", arpPacketName, writeErr) 43 } 44 if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(syscall.ETH_P_IP)); writeErr != nil { // Protocol Type: 0x0800 is IPv4 45 return formatPacketFieldWriteError("Protocol Type", arpPacketName, writeErr) 46 } 47 if writeErr := binary.Write(arpPacket, binary.BigEndian, uint8(6)); writeErr != nil { // Hardware address Length: 6 bytes for MAC address 48 return formatPacketFieldWriteError("Hardware address Length", arpPacketName, writeErr) 49 } 50 if writeErr := binary.Write(arpPacket, binary.BigEndian, uint8(4)); writeErr != nil { // Protocol address length: 4 bytes for IPv4 address 51 return formatPacketFieldWriteError("Protocol address length", arpPacketName, writeErr) 52 } 53 if writeErr := binary.Write(arpPacket, binary.BigEndian, uint16(1)); writeErr != nil { // Operation: 1 is request, 2 is response 54 return formatPacketFieldWriteError("Operation", arpPacketName, writeErr) 55 } 56 if _, writeErr := arpPacket.Write(linkObj.Attrs().HardwareAddr); writeErr != nil { // Sender hardware address 57 return formatPacketFieldWriteError("Sender hardware address", arpPacketName, writeErr) 58 } 59 if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Sender protocol address 60 return formatPacketFieldWriteError("Sender protocol address", arpPacketName, writeErr) 61 } 62 if _, writeErr := arpPacket.Write([]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}); writeErr != nil { // Target hardware address is the Broadcast MAC. 63 return formatPacketFieldWriteError("Target hardware address", arpPacketName, writeErr) 64 } 65 if _, writeErr := arpPacket.Write(srcIP.To4()); writeErr != nil { // Target protocol address 66 return formatPacketFieldWriteError("Target protocol address", arpPacketName, writeErr) 67 } 68 69 sockAddr := syscall.SockaddrLinklayer{ 70 Protocol: htons(syscall.ETH_P_ARP), // Ethertype of ARP (0x0806) 71 Ifindex: linkObj.Attrs().Index, // Interface Index 72 Hatype: 1, // Hardware Type: 1 is Ethernet 73 Pkttype: 0, // Packet Type. 74 Halen: 6, // Hardware address Length: 6 bytes for MAC address 75 Addr: [8]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // Address is the broadcast MAC address. 76 } 77 78 // Create a socket such that the Ethernet header would constructed by the OS. The arpPacket only contains the ARP payload. 79 soc, err := syscall.Socket(syscall.AF_PACKET, syscall.SOCK_DGRAM, int(htons(syscall.ETH_P_ARP))) 80 if err != nil { 81 return fmt.Errorf("failed to create AF_PACKET datagram socket: %v", err) 82 } 83 defer syscall.Close(soc) 84 85 if err := syscall.Sendto(soc, arpPacket.Bytes(), 0, &sockAddr); err != nil { 86 return fmt.Errorf("failed to send Gratuitous ARP for IPv4 %s on Interface %s: %v", srcIP.String(), linkObj.Attrs().Name, err) 87 } 88 89 return nil 90 } 91 92 // SendUnsolicitedNeighborAdvertisement sends an unsolicited neighbor advertisement packet with the provided source IP over the provided interface. 93 func SendUnsolicitedNeighborAdvertisement(srcIP net.IP, linkObj netlink.Link) error { 94 /* As per RFC 4861, a link-layer address change can multicast a few unsolicited neighbor advertisements to all nodes to quickly 95 * update the cached link-layer addresses that have become invalid. In the case of SRIOV-CNI, an address can be reused for 96 * different pods. Each pod could likely have a different link-layer address in this scenario, which makes the Neighbor Cache 97 * entries residing in the neighbors to be an invalid. The unsolicited neighbor advertisement should update the link-layer address 98 * accordingly for the IPv6 entry. 99 * However if any of these conditions are true: 100 * - The IPv6 address was not reused for the new pod. 101 * - No prior established communication with the neighbor. 102 * Then the neighbor receiving this unsolicited neighbor advertisement would be silently discard. This behavior is described 103 * in RFC 4861 section 7.2.5. This is acceptable behavior since the purpose of sending an unsolicited neighbor advertisement 104 * is not to create a new entry but rather update already existing invalid entries. 105 */ 106 107 // Construct the ICMPv6 Neighbor Advertisement packet following RFC 4861. 108 payload := new(bytes.Buffer) 109 // ICMPv6 Flags: As per RFC 4861, the solicited flag must not be set and the override flag should be set (to 110 // override existing cache entry) for unsolicited advertisements. 111 if writeErr := binary.Write(payload, binary.BigEndian, uint32(0x20000000)); writeErr != nil { 112 return formatPacketFieldWriteError("Flags", icmpV6PacketName, writeErr) 113 } 114 if _, writeErr := payload.Write(srcIP.To16()); writeErr != nil { // ICMPv6 Target IPv6 Address. 115 return formatPacketFieldWriteError("Target IPv6 Address", icmpV6PacketName, writeErr) 116 } 117 if writeErr := binary.Write(payload, binary.BigEndian, uint8(2)); writeErr != nil { // ICMPv6 Option Type: 2 is target link-layer address. 118 return formatPacketFieldWriteError("Option Type", icmpV6PacketName, writeErr) 119 } 120 if writeErr := binary.Write(payload, binary.BigEndian, uint8(1)); writeErr != nil { // ICMPv6 Option Length. Units of 8 bytes. 121 return formatPacketFieldWriteError("Option Length", icmpV6PacketName, writeErr) 122 } 123 if _, writeErr := payload.Write(linkObj.Attrs().HardwareAddr); writeErr != nil { // ICMPv6 Option Link-layer Address. 124 return formatPacketFieldWriteError("Option Link-layer Address", icmpV6PacketName, writeErr) 125 } 126 127 icmpv6Msg := icmp.Message{ 128 Type: ipv6.ICMPTypeNeighborAdvertisement, // ICMPv6 type is neighbor advertisement. 129 Code: 0, // ICMPv6 Code: As per RFC 4861 section 7.1.2, the code is always 0. 130 Checksum: 0, // Checksum is calculated later. 131 Body: &icmp.RawBody{ 132 Data: payload.Bytes(), 133 }, 134 } 135 136 // Get the byte array of the ICMPv6 Message. 137 icmpv6Bytes, err := icmpv6Msg.Marshal(nil) 138 if err != nil { 139 return fmt.Errorf("failed to Marshal ICMPv6 Message: %v", err) 140 } 141 142 // Create a socket such that the Ethernet header and IPv6 header would constructed by the OS. 143 soc, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_RAW, syscall.IPPROTO_ICMPV6) 144 if err != nil { 145 return fmt.Errorf("failed to create AF_INET6 raw socket: %v", err) 146 } 147 defer syscall.Close(soc) 148 149 // As per RFC 4861 section 7.1.2, the IPv6 hop limit is always 255. 150 if err := syscall.SetsockoptInt(soc, syscall.IPPROTO_IPV6, syscall.IPV6_MULTICAST_HOPS, 255); err != nil { 151 return fmt.Errorf("failed to set IPv6 multicast hops to 255: %v", err) 152 } 153 154 // Set the destination IPv6 address to the IPv6 link-local all nodes multicast address (ff02::1). 155 var r [16]byte 156 copy(r[:], net.IPv6linklocalallnodes.To16()) 157 sockAddr := syscall.SockaddrInet6{Addr: r} 158 if err := syscall.Sendto(soc, icmpv6Bytes, 0, &sockAddr); err != nil { 159 return fmt.Errorf("failed to send Unsolicited Neighbor Advertisement for IPv6 %s on Interface %s: %v", srcIP.String(), linkObj.Attrs().Name, err) 160 } 161 162 return nil 163 } 164 165 // AnnounceIPs sends either a GARP or Unsolicited NA depending on the IP address type (IPv4 vs. IPv6 respectively) configured on the interface. 166 func AnnounceIPs(ifName string, ipConfigs []*current.IPConfig) error { 167 myNetLink := MyNetlink{} 168 169 // Retrieve the interface name in the container. 170 linkObj, err := myNetLink.LinkByName(ifName) 171 if err != nil { 172 return fmt.Errorf("failed to get netlink device with name %q: %v", ifName, err) 173 } 174 if !IsValidMACAddress(linkObj.Attrs().HardwareAddr) { 175 return fmt.Errorf("invalid Ethernet MAC address: %q", linkObj.Attrs().HardwareAddr) 176 } 177 178 // For all the IP addresses assigned by IPAM, we will send either a GARP (IPv4) or Unsolicited NA (IPv6). 179 for _, ipc := range ipConfigs { 180 var err error 181 if IsIPv6(ipc.Address.IP) { 182 /* As per RFC 4861, sending unsolicited neighbor advertisements should be considered as a performance 183 * optimization. It does not reliably update caches in all nodes. The Neighbor Unreachability Detection 184 * algorithm is more reliable although it may take slightly longer to update. 185 */ 186 err = SendUnsolicitedNeighborAdvertisement(ipc.Address.IP, linkObj) 187 } else if IsIPv4(ipc.Address.IP) { 188 err = SendGratuitousArp(ipc.Address.IP, linkObj) 189 } else { 190 return fmt.Errorf("the IP %s on interface %q is neither IPv4 or IPv6", ipc.Address.IP.String(), ifName) 191 } 192 193 if err != nil { 194 return fmt.Errorf("failed to send GARP/NA message for ip %s on interface %q: %v", ipc.Address.IP.String(), ifName, err) 195 } 196 } 197 return nil 198 }