github.com/cilium/cilium@v1.16.2/bpf/include/bpf/builtins.h (about) 1 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 /* Copyright Authors of Cilium */ 3 4 #pragma once 5 6 #include "compiler.h" 7 8 #ifndef lock_xadd 9 # define lock_xadd(P, V) ((void) __sync_fetch_and_add((P), (V))) 10 #endif 11 12 /* Unfortunately verifier forces aligned stack access while other memory 13 * do not have to be aligned (map, pkt, etc). Mark those on the /stack/ 14 * for objects > 8 bytes in order to force-align such memcpy candidates 15 * when we really need them to be aligned, this is not needed for objects 16 * of size <= 8 bytes and in case of > 8 bytes /only/ when 8 byte is not 17 * the natural object alignment (e.g. __u8 foo[12]). 18 */ 19 #define __align_stack_8 __aligned(8) 20 21 /* Memory iterators used below. */ 22 #define __it_bwd(x, op) (x -= sizeof(__u##op)) 23 #define __it_fwd(x, op) (x += sizeof(__u##op)) 24 25 /* Memory operators used below. */ 26 #define __it_set(a, op) (*(__u##op *)__it_bwd(a, op)) = 0 27 #define __it_xor(a, b, r, op) r |= (*(__u##op *)__it_bwd(a, op)) ^ (*(__u##op *)__it_bwd(b, op)) 28 #define __it_mob(a, b, op) (*(__u##op *)__it_bwd(a, op)) = (*(__u##op *)__it_bwd(b, op)) 29 #define __it_mof(a, b, op) \ 30 do { \ 31 *(__u##op *)a = *(__u##op *)b; \ 32 __it_fwd(a, op); __it_fwd(b, op); \ 33 } while (0) 34 35 static __always_inline __maybe_unused void 36 __bpf_memset_builtin(void *d, __u8 c, __u64 len) 37 { 38 /* Everything non-zero or non-const (currently unsupported) as c 39 * gets handled here. 40 */ 41 __builtin_memset(d, c, len); 42 } 43 44 static __always_inline void __bpf_memzero(void *d, __u64 len) 45 { 46 #if __clang_major__ >= 10 47 if (!__builtin_constant_p(len)) 48 __throw_build_bug(); 49 50 d += len; 51 52 if (len > 1 && len % 2 == 1) { 53 __it_set(d, 8); 54 len -= 1; 55 } 56 57 switch (len) { 58 case 96: __it_set(d, 64); fallthrough; 59 case 88: jmp_88: __it_set(d, 64); fallthrough; 60 case 80: jmp_80: __it_set(d, 64); fallthrough; 61 case 72: jmp_72: __it_set(d, 64); fallthrough; 62 case 64: jmp_64: __it_set(d, 64); fallthrough; 63 case 56: jmp_56: __it_set(d, 64); fallthrough; 64 case 48: jmp_48: __it_set(d, 64); fallthrough; 65 case 40: jmp_40: __it_set(d, 64); fallthrough; 66 case 32: jmp_32: __it_set(d, 64); fallthrough; 67 case 24: jmp_24: __it_set(d, 64); fallthrough; 68 case 16: jmp_16: __it_set(d, 64); fallthrough; 69 case 8: jmp_8: __it_set(d, 64); 70 break; 71 72 case 94: __it_set(d, 16); __it_set(d, 32); goto jmp_88; 73 case 86: __it_set(d, 16); __it_set(d, 32); goto jmp_80; 74 case 78: __it_set(d, 16); __it_set(d, 32); goto jmp_72; 75 case 70: __it_set(d, 16); __it_set(d, 32); goto jmp_64; 76 case 62: __it_set(d, 16); __it_set(d, 32); goto jmp_56; 77 case 54: __it_set(d, 16); __it_set(d, 32); goto jmp_48; 78 case 46: __it_set(d, 16); __it_set(d, 32); goto jmp_40; 79 case 38: __it_set(d, 16); __it_set(d, 32); goto jmp_32; 80 case 30: __it_set(d, 16); __it_set(d, 32); goto jmp_24; 81 case 22: __it_set(d, 16); __it_set(d, 32); goto jmp_16; 82 case 14: __it_set(d, 16); __it_set(d, 32); goto jmp_8; 83 case 6: __it_set(d, 16); __it_set(d, 32); 84 break; 85 86 case 92: __it_set(d, 32); goto jmp_88; 87 case 84: __it_set(d, 32); goto jmp_80; 88 case 76: __it_set(d, 32); goto jmp_72; 89 case 68: __it_set(d, 32); goto jmp_64; 90 case 60: __it_set(d, 32); goto jmp_56; 91 case 52: __it_set(d, 32); goto jmp_48; 92 case 44: __it_set(d, 32); goto jmp_40; 93 case 36: __it_set(d, 32); goto jmp_32; 94 case 28: __it_set(d, 32); goto jmp_24; 95 case 20: __it_set(d, 32); goto jmp_16; 96 case 12: __it_set(d, 32); goto jmp_8; 97 case 4: __it_set(d, 32); 98 break; 99 100 case 90: __it_set(d, 16); goto jmp_88; 101 case 82: __it_set(d, 16); goto jmp_80; 102 case 74: __it_set(d, 16); goto jmp_72; 103 case 66: __it_set(d, 16); goto jmp_64; 104 case 58: __it_set(d, 16); goto jmp_56; 105 case 50: __it_set(d, 16); goto jmp_48; 106 case 42: __it_set(d, 16); goto jmp_40; 107 case 34: __it_set(d, 16); goto jmp_32; 108 case 26: __it_set(d, 16); goto jmp_24; 109 case 18: __it_set(d, 16); goto jmp_16; 110 case 10: __it_set(d, 16); goto jmp_8; 111 case 2: __it_set(d, 16); 112 break; 113 114 case 1: __it_set(d, 8); 115 break; 116 117 default: 118 /* __builtin_memset() is crappy slow since it cannot 119 * make any assumptions about alignment & underlying 120 * efficient unaligned access on the target we're 121 * running. 122 */ 123 __throw_build_bug(); 124 } 125 #else 126 __bpf_memset_builtin(d, 0, len); 127 #endif 128 } 129 130 static __always_inline __maybe_unused void 131 __bpf_no_builtin_memset(void *d __maybe_unused, __u8 c __maybe_unused, 132 __u64 len __maybe_unused) 133 { 134 __throw_build_bug(); 135 } 136 137 /* Redirect any direct use in our code to throw an error. */ 138 #define __builtin_memset __bpf_no_builtin_memset 139 140 static __always_inline __nobuiltin("memset") void memset(void *d, int c, 141 __u64 len) 142 { 143 if (__builtin_constant_p(len) && __builtin_constant_p(c) && c == 0) 144 __bpf_memzero(d, len); 145 else 146 __bpf_memset_builtin(d, (__u8)c, len); 147 } 148 149 static __always_inline __maybe_unused void 150 __bpf_memcpy_builtin(void *d, const void *s, __u64 len) 151 { 152 /* Explicit opt-in for __builtin_memcpy(). */ 153 __builtin_memcpy(d, s, len); 154 } 155 156 static __always_inline void __bpf_memcpy(void *d, const void *s, __u64 len) 157 { 158 #if __clang_major__ >= 10 159 if (!__builtin_constant_p(len)) 160 __throw_build_bug(); 161 162 d += len; 163 s += len; 164 165 if (len > 1 && len % 2 == 1) { 166 __it_mob(d, s, 8); 167 len -= 1; 168 } 169 170 switch (len) { 171 case 96: __it_mob(d, s, 64); fallthrough; 172 case 88: jmp_88: __it_mob(d, s, 64); fallthrough; 173 case 80: jmp_80: __it_mob(d, s, 64); fallthrough; 174 case 72: jmp_72: __it_mob(d, s, 64); fallthrough; 175 case 64: jmp_64: __it_mob(d, s, 64); fallthrough; 176 case 56: jmp_56: __it_mob(d, s, 64); fallthrough; 177 case 48: jmp_48: __it_mob(d, s, 64); fallthrough; 178 case 40: jmp_40: __it_mob(d, s, 64); fallthrough; 179 case 32: jmp_32: __it_mob(d, s, 64); fallthrough; 180 case 24: jmp_24: __it_mob(d, s, 64); fallthrough; 181 case 16: jmp_16: __it_mob(d, s, 64); fallthrough; 182 case 8: jmp_8: __it_mob(d, s, 64); 183 break; 184 185 case 94: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_88; 186 case 86: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_80; 187 case 78: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_72; 188 case 70: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_64; 189 case 62: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_56; 190 case 54: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_48; 191 case 46: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_40; 192 case 38: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_32; 193 case 30: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_24; 194 case 22: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_16; 195 case 14: __it_mob(d, s, 16); __it_mob(d, s, 32); goto jmp_8; 196 case 6: __it_mob(d, s, 16); __it_mob(d, s, 32); 197 break; 198 199 case 92: __it_mob(d, s, 32); goto jmp_88; 200 case 84: __it_mob(d, s, 32); goto jmp_80; 201 case 76: __it_mob(d, s, 32); goto jmp_72; 202 case 68: __it_mob(d, s, 32); goto jmp_64; 203 case 60: __it_mob(d, s, 32); goto jmp_56; 204 case 52: __it_mob(d, s, 32); goto jmp_48; 205 case 44: __it_mob(d, s, 32); goto jmp_40; 206 case 36: __it_mob(d, s, 32); goto jmp_32; 207 case 28: __it_mob(d, s, 32); goto jmp_24; 208 case 20: __it_mob(d, s, 32); goto jmp_16; 209 case 12: __it_mob(d, s, 32); goto jmp_8; 210 case 4: __it_mob(d, s, 32); 211 break; 212 213 case 90: __it_mob(d, s, 16); goto jmp_88; 214 case 82: __it_mob(d, s, 16); goto jmp_80; 215 case 74: __it_mob(d, s, 16); goto jmp_72; 216 case 66: __it_mob(d, s, 16); goto jmp_64; 217 case 58: __it_mob(d, s, 16); goto jmp_56; 218 case 50: __it_mob(d, s, 16); goto jmp_48; 219 case 42: __it_mob(d, s, 16); goto jmp_40; 220 case 34: __it_mob(d, s, 16); goto jmp_32; 221 case 26: __it_mob(d, s, 16); goto jmp_24; 222 case 18: __it_mob(d, s, 16); goto jmp_16; 223 case 10: __it_mob(d, s, 16); goto jmp_8; 224 case 2: __it_mob(d, s, 16); 225 break; 226 227 case 1: __it_mob(d, s, 8); 228 break; 229 230 default: 231 /* __builtin_memcpy() is crappy slow since it cannot 232 * make any assumptions about alignment & underlying 233 * efficient unaligned access on the target we're 234 * running. 235 */ 236 __throw_build_bug(); 237 } 238 #else 239 __bpf_memcpy_builtin(d, s, len); 240 #endif 241 } 242 243 static __always_inline __maybe_unused void 244 __bpf_no_builtin_memcpy(void *d __maybe_unused, const void *s __maybe_unused, 245 __u64 len __maybe_unused) 246 { 247 __throw_build_bug(); 248 } 249 250 /* Redirect any direct use in our code to throw an error. */ 251 #define __builtin_memcpy __bpf_no_builtin_memcpy 252 253 static __always_inline __nobuiltin("memcpy") void memcpy(void *d, const void *s, 254 __u64 len) 255 { 256 return __bpf_memcpy(d, s, len); 257 } 258 259 static __always_inline __maybe_unused __u64 260 __bpf_memcmp_builtin(const void *x, const void *y, __u64 len) 261 { 262 /* Explicit opt-in for __builtin_memcmp(). We use the bcmp builtin 263 * here for two reasons: i) we only need to know equal or non-equal 264 * similar as in __bpf_memcmp(), and ii) if __bpf_memcmp() ends up 265 * selecting __bpf_memcmp_builtin(), clang generats a memcmp loop. 266 * That is, (*) -> __bpf_memcmp() -> __bpf_memcmp_builtin() -> 267 * __builtin_memcmp() -> memcmp() -> (*), meaning it will end up 268 * selecting our memcmp() from here. Remapping to __builtin_bcmp() 269 * breaks this loop and resolves both needs at once. 270 */ 271 return __builtin_bcmp(x, y, len); 272 } 273 274 static __always_inline __u64 __bpf_memcmp(const void *x, const void *y, 275 __u64 len) 276 { 277 #if __clang_major__ >= 10 278 __u64 r = 0; 279 280 if (!__builtin_constant_p(len)) 281 __throw_build_bug(); 282 283 x += len; 284 y += len; 285 286 if (len > 1 && len % 2 == 1) { 287 __it_xor(x, y, r, 8); 288 len -= 1; 289 } 290 291 switch (len) { 292 case 72: __it_xor(x, y, r, 64); fallthrough; 293 case 64: jmp_64: __it_xor(x, y, r, 64); fallthrough; 294 case 56: jmp_56: __it_xor(x, y, r, 64); fallthrough; 295 case 48: jmp_48: __it_xor(x, y, r, 64); fallthrough; 296 case 40: jmp_40: __it_xor(x, y, r, 64); fallthrough; 297 case 32: jmp_32: __it_xor(x, y, r, 64); fallthrough; 298 case 24: jmp_24: __it_xor(x, y, r, 64); fallthrough; 299 case 16: jmp_16: __it_xor(x, y, r, 64); fallthrough; 300 case 8: jmp_8: __it_xor(x, y, r, 64); 301 break; 302 303 case 70: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_64; 304 case 62: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_56; 305 case 54: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_48; 306 case 46: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_40; 307 case 38: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_32; 308 case 30: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_24; 309 case 22: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_16; 310 case 14: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); goto jmp_8; 311 case 6: __it_xor(x, y, r, 16); __it_xor(x, y, r, 32); 312 break; 313 314 case 68: __it_xor(x, y, r, 32); goto jmp_64; 315 case 60: __it_xor(x, y, r, 32); goto jmp_56; 316 case 52: __it_xor(x, y, r, 32); goto jmp_48; 317 case 44: __it_xor(x, y, r, 32); goto jmp_40; 318 case 36: __it_xor(x, y, r, 32); goto jmp_32; 319 case 28: __it_xor(x, y, r, 32); goto jmp_24; 320 case 20: __it_xor(x, y, r, 32); goto jmp_16; 321 case 12: __it_xor(x, y, r, 32); goto jmp_8; 322 case 4: __it_xor(x, y, r, 32); 323 break; 324 325 case 66: __it_xor(x, y, r, 16); goto jmp_64; 326 case 58: __it_xor(x, y, r, 16); goto jmp_56; 327 case 50: __it_xor(x, y, r, 16); goto jmp_48; 328 case 42: __it_xor(x, y, r, 16); goto jmp_40; 329 case 34: __it_xor(x, y, r, 16); goto jmp_32; 330 case 26: __it_xor(x, y, r, 16); goto jmp_24; 331 case 18: __it_xor(x, y, r, 16); goto jmp_16; 332 case 10: __it_xor(x, y, r, 16); goto jmp_8; 333 case 2: __it_xor(x, y, r, 16); 334 break; 335 336 case 1: __it_xor(x, y, r, 8); 337 break; 338 339 default: 340 __throw_build_bug(); 341 } 342 343 return r; 344 #else 345 return __bpf_memcmp_builtin(x, y, len); 346 #endif 347 } 348 349 static __always_inline __maybe_unused __u64 350 __bpf_no_builtin_memcmp(const void *x __maybe_unused, 351 const void *y __maybe_unused, __u64 len __maybe_unused) 352 { 353 __throw_build_bug(); 354 return 0; 355 } 356 357 /* Redirect any direct use in our code to throw an error. */ 358 #define __builtin_memcmp __bpf_no_builtin_memcmp 359 360 /* Modified for our needs in that we only return either zero (x and y 361 * are equal) or non-zero (x and y are non-equal). 362 */ 363 static __always_inline __nobuiltin("memcmp") __u64 memcmp(const void *x, 364 const void *y, 365 __u64 len) 366 { 367 return __bpf_memcmp(x, y, len); 368 } 369 370 static __always_inline __maybe_unused void 371 __bpf_memmove_builtin(void *d, const void *s, __u64 len) 372 { 373 /* Explicit opt-in for __builtin_memmove(). */ 374 __builtin_memmove(d, s, len); 375 } 376 377 static __always_inline void __bpf_memmove_bwd(void *d, const void *s, __u64 len) 378 { 379 /* Our internal memcpy implementation walks backwards by default. */ 380 __bpf_memcpy(d, s, len); 381 } 382 383 static __always_inline void __bpf_memmove_fwd(void *d, const void *s, __u64 len) 384 { 385 #if __clang_major__ >= 10 386 if (!__builtin_constant_p(len)) 387 __throw_build_bug(); 388 389 switch (len) { 390 case 96: __it_mof(d, s, 64); fallthrough; 391 case 88: jmp_88: __it_mof(d, s, 64); fallthrough; 392 case 80: jmp_80: __it_mof(d, s, 64); fallthrough; 393 case 72: jmp_72: __it_mof(d, s, 64); fallthrough; 394 case 64: jmp_64: __it_mof(d, s, 64); fallthrough; 395 case 56: jmp_56: __it_mof(d, s, 64); fallthrough; 396 case 48: jmp_48: __it_mof(d, s, 64); fallthrough; 397 case 40: jmp_40: __it_mof(d, s, 64); fallthrough; 398 case 32: jmp_32: __it_mof(d, s, 64); fallthrough; 399 case 24: jmp_24: __it_mof(d, s, 64); fallthrough; 400 case 16: jmp_16: __it_mof(d, s, 64); fallthrough; 401 case 8: jmp_8: __it_mof(d, s, 64); 402 break; 403 404 case 94: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_88; 405 case 86: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_80; 406 case 78: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_72; 407 case 70: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_64; 408 case 62: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_56; 409 case 54: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_48; 410 case 46: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_40; 411 case 38: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_32; 412 case 30: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_24; 413 case 22: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_16; 414 case 14: __it_mof(d, s, 16); __it_mof(d, s, 32); goto jmp_8; 415 case 6: __it_mof(d, s, 16); __it_mof(d, s, 32); 416 break; 417 418 case 92: __it_mof(d, s, 32); goto jmp_88; 419 case 84: __it_mof(d, s, 32); goto jmp_80; 420 case 76: __it_mof(d, s, 32); goto jmp_72; 421 case 68: __it_mof(d, s, 32); goto jmp_64; 422 case 60: __it_mof(d, s, 32); goto jmp_56; 423 case 52: __it_mof(d, s, 32); goto jmp_48; 424 case 44: __it_mof(d, s, 32); goto jmp_40; 425 case 36: __it_mof(d, s, 32); goto jmp_32; 426 case 28: __it_mof(d, s, 32); goto jmp_24; 427 case 20: __it_mof(d, s, 32); goto jmp_16; 428 case 12: __it_mof(d, s, 32); goto jmp_8; 429 case 4: __it_mof(d, s, 32); 430 break; 431 432 case 90: __it_mof(d, s, 16); goto jmp_88; 433 case 82: __it_mof(d, s, 16); goto jmp_80; 434 case 74: __it_mof(d, s, 16); goto jmp_72; 435 case 66: __it_mof(d, s, 16); goto jmp_64; 436 case 58: __it_mof(d, s, 16); goto jmp_56; 437 case 50: __it_mof(d, s, 16); goto jmp_48; 438 case 42: __it_mof(d, s, 16); goto jmp_40; 439 case 34: __it_mof(d, s, 16); goto jmp_32; 440 case 26: __it_mof(d, s, 16); goto jmp_24; 441 case 18: __it_mof(d, s, 16); goto jmp_16; 442 case 10: __it_mof(d, s, 16); goto jmp_8; 443 case 2: __it_mof(d, s, 16); 444 break; 445 446 case 1: __it_mof(d, s, 8); 447 break; 448 449 default: 450 /* __builtin_memmove() is crappy slow since it cannot 451 * make any assumptions about alignment & underlying 452 * efficient unaligned access on the target we're 453 * running. 454 */ 455 __throw_build_bug(); 456 } 457 #else 458 __bpf_memmove_builtin(d, s, len); 459 #endif 460 } 461 462 static __always_inline __maybe_unused void 463 __bpf_no_builtin_memmove(void *d __maybe_unused, const void *s __maybe_unused, 464 __u64 len __maybe_unused) 465 { 466 __throw_build_bug(); 467 } 468 469 /* Redirect any direct use in our code to throw an error. */ 470 #define __builtin_memmove __bpf_no_builtin_memmove 471 472 static __always_inline void __bpf_memmove(void *d, const void *s, __u64 len) 473 { 474 /* Note, the forward walking memmove() might not work with on-stack data 475 * since we'll end up walking the memory unaligned even when __align_stack_8 476 * is set. Should not matter much since we'll use memmove() mostly or only 477 * on pkt data. 478 * 479 * Example with d, s, len = 12 bytes: 480 * * __bpf_memmove_fwd() emits: mov_32 d[0],s[0]; mov_64 d[4],s[4] 481 * * __bpf_memmove_bwd() emits: mov_32 d[8],s[8]; mov_64 d[0],s[0] 482 */ 483 if (d <= s) 484 return __bpf_memmove_fwd(d, s, len); 485 else 486 return __bpf_memmove_bwd(d, s, len); 487 } 488 489 static __always_inline __nobuiltin("memmove") void memmove(void *d, 490 const void *s, 491 __u64 len) 492 { 493 return __bpf_memmove(d, s, len); 494 }