github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgets/trace/dns/tracer/bpf/dns.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2024 The Inspektor Gadget authors */ 3 4 #include <linux/bpf.h> 5 #include <linux/if_ether.h> 6 #include <linux/ip.h> 7 #include <linux/in.h> 8 #include <linux/udp.h> 9 #include <sys/socket.h> 10 #include <stdbool.h> 11 12 #include <bpf/bpf_helpers.h> 13 #include <bpf/bpf_endian.h> 14 15 #define GADGET_TYPE_NETWORKING 16 #include <gadget/sockets-map.h> 17 18 #include "dns-common.h" 19 20 #ifndef PACKET_HOST 21 #define PACKET_HOST 0x0 22 #endif 23 24 #ifndef PACKET_OUTGOING 25 #define PACKET_OUTGOING 0x4 26 #endif 27 28 #define DNS_QR_QUERY 0 29 #define DNS_QR_RESP 1 30 31 #define MAX_PORTS 16 32 const volatile __u16 ports[MAX_PORTS] = { 53, 5353 }; 33 const volatile __u16 ports_len = 2; 34 35 static __always_inline bool is_dns_port(__u16 port) 36 { 37 for (int i = 0; i < ports_len; i++) { 38 if (ports[i] == port) 39 return true; 40 } 41 return false; 42 } 43 44 // we need this to make sure the compiler doesn't remove our struct 45 const struct event_t *unusedevent __attribute__((unused)); 46 47 struct { 48 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 49 __uint(key_size, sizeof(__u32)); 50 __uint(value_size, sizeof(__u32)); 51 } events SEC(".maps"); 52 53 // https://datatracker.ietf.org/doc/html/rfc1035#section-4.1.1 54 union dnsflags { 55 struct { 56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 57 __u8 rcode : 4; // response code 58 __u8 z : 3; // reserved 59 __u8 ra : 1; // recursion available 60 __u8 rd : 1; // recursion desired 61 __u8 tc : 1; // truncation 62 __u8 aa : 1; // authoritive answer 63 __u8 opcode : 4; // kind of query 64 __u8 qr : 1; // 0=query; 1=response 65 #elif __BYTE_ORDER == __ORDER_BIG_ENDIAN__ 66 __u8 qr : 1; // 0=query; 1=response 67 __u8 opcode : 4; // kind of query 68 __u8 aa : 1; // authoritive answer 69 __u8 tc : 1; // truncation 70 __u8 rd : 1; // recursion desired 71 __u8 ra : 1; // recursion available 72 __u8 z : 3; // reserved 73 __u8 rcode : 4; // response code 74 #else 75 #error "Fix your compiler's __BYTE_ORDER__?!" 76 #endif 77 }; 78 __u16 flags; 79 }; 80 81 struct dnshdr { 82 __u16 id; 83 84 union dnsflags flags; 85 86 __u16 qdcount; // number of question entries 87 __u16 ancount; // number of answer entries 88 __u16 nscount; // number of authority records 89 __u16 arcount; // number of additional records 90 }; 91 92 // Map of DNS query to timestamp so we can calculate latency from query sent to answer received. 93 struct query_key_t { 94 __u64 pid_tgid; 95 __u16 id; 96 __u16 pad[3]; // this is needed, otherwise the verifier claims an invalid read from stack 97 }; 98 99 struct { 100 __uint(type, BPF_MAP_TYPE_HASH); 101 __type(key, struct query_key_t); 102 __type(value, __u64); // timestamp of the query 103 __uint(max_entries, 1024); 104 } query_map SEC(".maps"); 105 106 SEC("socket1") 107 int ig_trace_dns(struct __sk_buff *skb) 108 { 109 struct event_t event; 110 __u16 sport, dport, l4_off, dns_off, h_proto, id; 111 __u8 proto; 112 int i; 113 114 // Do a first pass only to extract the port and drop the packet if it's not DNS 115 h_proto = load_half(skb, offsetof(struct ethhdr, h_proto)); 116 switch (h_proto) { 117 case ETH_P_IP: 118 proto = load_byte(skb, 119 ETH_HLEN + offsetof(struct iphdr, protocol)); 120 // An IPv4 header doesn't have a fixed size. The IHL field of a packet 121 // represents the size of the IP header in 32-bit words, so we need to 122 // multiply this value by 4 to get the header size in bytes. 123 __u8 ihl_byte = load_byte(skb, ETH_HLEN); 124 struct iphdr *iph = (struct iphdr *)&ihl_byte; 125 __u8 ip_header_len = iph->ihl * 4; 126 l4_off = ETH_HLEN + ip_header_len; 127 break; 128 129 case ETH_P_IPV6: 130 proto = load_byte(skb, 131 ETH_HLEN + offsetof(struct ipv6hdr, nexthdr)); 132 l4_off = ETH_HLEN + sizeof(struct ipv6hdr); 133 134 // Parse IPv6 extension headers 135 // Up to 6 extension headers can be chained. See ipv6_ext_hdr(). 136 #pragma unroll 137 for (i = 0; i < 6; i++) { 138 __u8 nextproto; 139 140 // TCP or UDP found 141 if (proto == NEXTHDR_TCP || proto == NEXTHDR_UDP) 142 break; 143 144 nextproto = load_byte(skb, l4_off); 145 146 // Unfortunately, each extension header has a different way to calculate the header length. 147 // Support the ones defined in ipv6_ext_hdr(). See ipv6_skip_exthdr(). 148 switch (proto) { 149 case NEXTHDR_FRAGMENT: 150 // No hdrlen in the fragment header 151 l4_off += 8; 152 break; 153 case NEXTHDR_AUTH: 154 // See ipv6_authlen() 155 l4_off += 4 * (load_byte(skb, l4_off + 1) + 2); 156 break; 157 case NEXTHDR_HOP: 158 case NEXTHDR_ROUTING: 159 case NEXTHDR_DEST: 160 // See ipv6_optlen() 161 l4_off += 8 * (load_byte(skb, l4_off + 1) + 1); 162 break; 163 case NEXTHDR_NONE: 164 // Nothing more in the packet. Not even TCP or UDP. 165 return 0; 166 default: 167 // Unknown header 168 return 0; 169 } 170 proto = nextproto; 171 } 172 break; 173 174 default: 175 return 0; 176 } 177 178 switch (proto) { 179 case IPPROTO_UDP: 180 sport = load_half(skb, 181 l4_off + offsetof(struct udphdr, source)); 182 dport = load_half(skb, l4_off + offsetof(struct udphdr, dest)); 183 dns_off = l4_off + sizeof(struct udphdr); 184 break; 185 // TODO: support TCP 186 default: 187 return 0; 188 } 189 190 if (!is_dns_port(sport) && !is_dns_port(dport)) 191 return 0; 192 193 // Initialize event here only after we know we're interested in this packet to avoid 194 // spending useless cycles. 195 __builtin_memset(&event, 0, sizeof(event)); 196 197 event.netns = skb->cb[0]; // cb[0] initialized by dispatcher.bpf.c 198 event.timestamp = bpf_ktime_get_boot_ns(); 199 event.proto = proto; 200 event.dns_off = dns_off; 201 event.pkt_type = skb->pkt_type; 202 event.sport = sport; 203 event.dport = dport; 204 205 // The packet is DNS: Do a second pass to extract all the information we need 206 switch (h_proto) { 207 case ETH_P_IP: 208 event.af = AF_INET; 209 event.daddr_v4 = load_word( 210 skb, ETH_HLEN + offsetof(struct iphdr, daddr)); 211 event.saddr_v4 = load_word( 212 skb, ETH_HLEN + offsetof(struct iphdr, saddr)); 213 // load_word converts from network to host endianness. Convert back to 214 // network endianness because inet_ntop() requires it. 215 event.daddr_v4 = bpf_htonl(event.daddr_v4); 216 event.saddr_v4 = bpf_htonl(event.saddr_v4); 217 break; 218 case ETH_P_IPV6: 219 event.af = AF_INET6; 220 if (bpf_skb_load_bytes( 221 skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), 222 &event.saddr_v6, sizeof(event.saddr_v6))) 223 return 0; 224 if (bpf_skb_load_bytes( 225 skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), 226 &event.daddr_v6, sizeof(event.daddr_v6))) 227 return 0; 228 break; 229 } 230 231 // Enrich event with process metadata 232 struct sockets_value *skb_val = gadget_socket_lookup(skb); 233 if (skb_val != NULL) { 234 event.mount_ns_id = skb_val->mntns; 235 event.pid = skb_val->pid_tgid >> 32; 236 event.tid = (__u32)skb_val->pid_tgid; 237 __builtin_memcpy(&event.task, skb_val->task, 238 sizeof(event.task)); 239 event.uid = (__u32)skb_val->uid_gid; 240 event.gid = (__u32)(skb_val->uid_gid >> 32); 241 } 242 243 // Calculate latency: 244 // 245 // Track the latency from when a query is sent from a container 246 // to when a response to the query is received by that same container. 247 // 248 // * On DNS query sent from a container namespace (qr == DNS_QR_QUERY and pkt_type == OUTGOING), 249 // store the query timestamp in a map. 250 // 251 // * On DNS response received in the same container namespace (qr == DNS_QR_RESP and pkt_type == HOST) 252 // retrieve/delete the query timestamp and set the latency field on the event. 253 // 254 // A garbage collection thread running in userspace periodically scans for keys with old timestamps 255 // to free space occupied by queries that never receive a response. 256 // 257 // Skip this if skb_val == NULL (gadget_socket_lookup did not set pid_tgid we use in the query key) 258 // or if event->timestamp == 0 (kernels before 5.8 don't support bpf_ktime_get_boot_ns, and the patched 259 // version IG injects always returns zero). 260 if (skb_val != NULL && event.timestamp > 0) { 261 union dnsflags flags; 262 flags.flags = load_half(skb, dns_off + offsetof(struct dnshdr, 263 flags)); 264 id = load_half(skb, dns_off + offsetof(struct dnshdr, id)); 265 __u8 qr = flags.qr; 266 267 struct query_key_t query_key = { 268 .pid_tgid = skb_val->pid_tgid, 269 .id = id, 270 }; 271 if (qr == DNS_QR_QUERY && event.pkt_type == PACKET_OUTGOING) { 272 bpf_map_update_elem(&query_map, &query_key, 273 &event.timestamp, BPF_NOEXIST); 274 } else if (flags.qr == DNS_QR_RESP && 275 event.pkt_type == PACKET_HOST) { 276 __u64 *query_ts = 277 bpf_map_lookup_elem(&query_map, &query_key); 278 if (query_ts != NULL) { 279 // query ts should always be less than the event ts, but check anyway to be safe. 280 if (*query_ts < event.timestamp) { 281 event.latency_ns = 282 event.timestamp - *query_ts; 283 } 284 bpf_map_delete_elem(&query_map, &query_key); 285 } 286 } 287 } 288 289 __u64 skb_len = skb->len; 290 bpf_perf_event_output(skb, &events, skb_len << 32 | BPF_F_CURRENT_CPU, 291 &event, sizeof(event)); 292 293 return 0; 294 } 295 296 char _license[] SEC("license") = "GPL";