github.com/rohankumardubey/cilium@v1.6.12/bpf/cilium-map-migrate.c (about) 1 /* 2 * Copyright (C) 2017 Authors of Cilium 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 * 18 * Parts from iproute2 bpf.c loader code: 19 * 20 * This program is free software; you can distribute it and/or 21 * modify it under the terms of the GNU General Public License 22 * as published by the Free Software Foundation; either version 23 * 2 of the License, or (at your option) any later version. 24 * 25 * Authors: 26 * 27 * Daniel Borkmann <daniel@iogearbox.net> 28 * Jiri Pirko <jiri@resnulli.us> 29 * Alexei Starovoitov <ast@kernel.org> 30 */ 31 32 #include <stdio.h> 33 #include <syslog.h> 34 #include <stdlib.h> 35 #include <unistd.h> 36 #include <stdbool.h> 37 #include <errno.h> 38 #include <fcntl.h> 39 #include <string.h> 40 #include <limits.h> 41 42 #include <sys/syscall.h> 43 #include <sys/stat.h> 44 45 #include <arpa/inet.h> 46 47 #include <linux/bpf.h> 48 49 #include "elf/libelf.h" 50 #include "elf/gelf.h" 51 52 #include "iproute2/bpf_elf.h" 53 54 #ifndef EM_BPF 55 # define EM_BPF 247 56 #endif 57 58 #define ELF_MAX_MAPS 64 59 60 #define STATE_PENDING "pending" 61 62 #define BPF_ENV_MNT "CILIUM_BPF_MNT" 63 64 struct bpf_elf_sec_data { 65 GElf_Shdr sec_hdr; 66 Elf_Data *sec_data; 67 const char *sec_name; 68 }; 69 70 struct bpf_elf_ctx { 71 GElf_Ehdr elf_hdr; 72 Elf *elf_fd; 73 Elf_Data *sym_tab; 74 Elf_Data *str_tab; 75 Elf_Data *map_tab; 76 int map_len; 77 int map_num; 78 int map_sec; 79 int sym_num; 80 int obj_fd; 81 }; 82 83 static int bpf(int cmd, union bpf_attr *attr, unsigned int size) 84 { 85 #ifndef __NR_bpf 86 # if defined(__i386__) 87 # define __NR_bpf 357 88 # elif defined(__x86_64__) 89 # define __NR_bpf 321 90 # elif defined(__aarch64__) 91 # define __NR_bpf 280 92 # else 93 # error __NR_bpf not defined. 94 # endif 95 #endif 96 return syscall(__NR_bpf, cmd, attr, size); 97 } 98 99 static int renameat2(int dfd1, const char *path1, 100 int dfd2, const char *path2, 101 unsigned int flags) 102 { 103 #ifndef __NR_renameat2 104 # if defined(__i386__) 105 # define __NR_renameat2 353 106 # elif defined(__x86_64__) 107 # define __NR_renameat2 316 108 # elif defined(__aarch64__) 109 # define __NR_renameat2 276 110 # else 111 # error __NR_renameat2 not defined. 112 # endif 113 #endif 114 return syscall(__NR_renameat2, dfd1, path1, dfd2, path2, flags); 115 } 116 117 static inline __u64 bpf_ptr_to_u64(const void *ptr) 118 { 119 return (__u64)(unsigned long)ptr; 120 } 121 122 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) 123 { 124 if (ctx->elf_hdr.e_type != ET_REL || 125 (ctx->elf_hdr.e_machine != EM_NONE && 126 ctx->elf_hdr.e_machine != EM_BPF) || 127 ctx->elf_hdr.e_version != EV_CURRENT) { 128 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); 129 return -EINVAL; 130 } 131 132 switch (ctx->elf_hdr.e_ident[EI_DATA]) { 133 default: 134 fprintf(stderr, "ELF format error, wrong endianness info?\n"); 135 return -EINVAL; 136 case ELFDATA2LSB: 137 if (htons(1) == 1) { 138 fprintf(stderr, 139 "We are big endian, eBPF object is little endian!\n"); 140 return -EIO; 141 } 142 break; 143 case ELFDATA2MSB: 144 if (htons(1) != 1) { 145 fprintf(stderr, 146 "We are little endian, eBPF object is big endian!\n"); 147 return -EIO; 148 } 149 break; 150 } 151 152 return 0; 153 } 154 155 static int bpf_elf_init(struct bpf_elf_ctx *ctx, const char *pathname) 156 { 157 int ret; 158 159 if (elf_version(EV_CURRENT) == EV_NONE) 160 return -EIO; 161 ctx->obj_fd = open(pathname, O_RDONLY); 162 if (ctx->obj_fd < 0) 163 return ctx->obj_fd; 164 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); 165 if (!ctx->elf_fd) { 166 ret = -EINVAL; 167 goto out_fd; 168 } 169 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { 170 ret = -EINVAL; 171 goto out_fd; 172 } 173 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != 174 &ctx->elf_hdr) { 175 ret = -EIO; 176 goto out_elf; 177 } 178 ret = bpf_elf_check_ehdr(ctx); 179 if (ret < 0) 180 goto out_elf; 181 return 0; 182 out_elf: 183 elf_end(ctx->elf_fd); 184 out_fd: 185 close(ctx->obj_fd); 186 return ret; 187 } 188 189 static void bpf_elf_close(struct bpf_elf_ctx *ctx) 190 { 191 elf_end(ctx->elf_fd); 192 close(ctx->obj_fd); 193 } 194 195 static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, 196 const GElf_Sym *sym) 197 { 198 return ctx->str_tab->d_buf + sym->st_name; 199 } 200 201 static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) 202 { 203 GElf_Sym sym; 204 int off, i; 205 206 for (off = 0; off < end; off += ctx->map_len) { 207 /* Order doesn't need to be linear here, hence we walk 208 * the table again. 209 */ 210 for (i = 0; i < ctx->sym_num; i++) { 211 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 212 continue; 213 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 214 !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE || 215 GELF_ST_TYPE(sym.st_info) == STT_OBJECT) || 216 sym.st_shndx != ctx->map_sec) 217 continue; 218 if (sym.st_value == off) 219 break; 220 if (i == ctx->sym_num - 1) 221 return -1; 222 } 223 } 224 225 return off == end ? 0 : -1; 226 } 227 228 static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, unsigned long off) 229 { 230 GElf_Sym sym; 231 int i; 232 233 for (i = 0; i < ctx->sym_num; i++) { 234 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 235 continue; 236 237 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 238 !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE || 239 GELF_ST_TYPE(sym.st_info) == STT_OBJECT) || 240 sym.st_shndx != ctx->map_sec || 241 sym.st_value != off) 242 continue; 243 return bpf_str_tab_name(ctx, &sym); 244 } 245 246 return NULL; 247 } 248 249 static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) 250 { 251 int i, num = 0; 252 GElf_Sym sym; 253 254 for (i = 0; i < ctx->sym_num; i++) { 255 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 256 continue; 257 258 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || 259 !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE || 260 GELF_ST_TYPE(sym.st_info) == STT_OBJECT) || 261 sym.st_shndx != ctx->map_sec) 262 continue; 263 num++; 264 } 265 266 return num; 267 } 268 269 static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map) 270 { 271 char file[PATH_MAX], buff[256]; 272 unsigned int val; 273 FILE *fp; 274 275 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 276 memset(map, 0, sizeof(*map)); 277 278 fp = fopen(file, "r"); 279 if (!fp) { 280 fprintf(stderr, "No procfs support?!\n"); 281 return -EIO; 282 } 283 284 while (fgets(buff, sizeof(buff), fp)) { 285 if (sscanf(buff, "map_type:\t%u", &val) == 1) 286 map->type = val; 287 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 288 map->size_key = val; 289 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 290 map->size_value = val; 291 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 292 map->max_elem = val; 293 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 294 map->flags = val; 295 } 296 297 fclose(fp); 298 return 0; 299 } 300 301 static int bpf_obj_get(const char *pathname) 302 { 303 union bpf_attr attr = {}; 304 305 attr.pathname = bpf_ptr_to_u64(pathname); 306 return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 307 } 308 309 typedef int (*bpf_handle_state_t)(struct bpf_elf_ctx *ctx, 310 const struct bpf_elf_map *map, 311 const char *name, int exit); 312 313 char fs_base[PATH_MAX + 1]; 314 315 void fs_base_init() 316 { 317 const char *mnt_env = getenv(BPF_ENV_MNT); 318 319 if (mnt_env) { 320 snprintf(fs_base, sizeof(fs_base), "%s/tc/globals", mnt_env); 321 } else { 322 strcpy(fs_base, "/sys/fs/bpf/tc/globals"); 323 } 324 } 325 326 static int bpf_handle_pending(struct bpf_elf_ctx *ctx, 327 const struct bpf_elf_map *map, 328 const char *name, int exit) 329 { 330 char file[PATH_MAX + 1], dest[PATH_MAX + 1]; 331 struct bpf_elf_map pinned; 332 struct stat sb; 333 int fd, ret; 334 335 snprintf(file, sizeof(file), "%s/%s", fs_base, name); 336 ret = stat(file, &sb); 337 if (ret < 0) { 338 if (errno == ENOENT) 339 return 0; 340 fprintf(stderr, "Cannot stat node %s!\n", file); 341 return -errno; 342 } 343 344 fd = bpf_obj_get(file); 345 if (fd < 0) { 346 fprintf(stderr, "Cannot open pinned node %s!\n", file); 347 return -errno; 348 } 349 ret = bpf_derive_elf_map_from_fdinfo(fd, &pinned); 350 close(fd); 351 if (ret < 0) { 352 fprintf(stderr, "Cannot fetch fdinfo from %s!\n", file); 353 return ret; 354 } 355 356 pinned.id = map->id; 357 pinned.pinning = map->pinning; 358 if (!memcmp(map, &pinned, sizeof(pinned))) 359 return 0; 360 361 snprintf(dest, sizeof(dest), "%s:%s", file, STATE_PENDING); 362 syslog(LOG_WARNING, "Property mismatch in %s, migrating node to %s!\n", 363 file, dest); 364 utimensat(AT_FDCWD, file, NULL, 0); 365 return rename(file, dest); 366 } 367 368 static int bpf_handle_finalize(struct bpf_elf_ctx *ctx, 369 const struct bpf_elf_map *map, 370 const char *name, int exit) 371 { 372 char file[PATH_MAX + 1], dest[PATH_MAX + 1]; 373 struct stat sb; 374 int ret; 375 376 snprintf(file, sizeof(file), "%s/%s:%s", fs_base, name, 377 STATE_PENDING); 378 ret = stat(file, &sb); 379 if (ret < 0) { 380 if (errno == ENOENT) 381 return 0; 382 fprintf(stderr, "Cannot stat node %s!\n", file); 383 return -errno; 384 } 385 386 if (exit) { 387 snprintf(dest, sizeof(dest), "%s/%s", fs_base, name); 388 syslog(LOG_WARNING, "Restoring migrated node %s into %s due to bad exit.\n", 389 file, dest); 390 utimensat(AT_FDCWD, file, NULL, 0); 391 renameat2(AT_FDCWD, file, AT_FDCWD, dest, 1); 392 return 0; 393 } else { 394 syslog(LOG_WARNING, "Unlinking migrated node %s due to good exit.\n", 395 file); 396 return unlink(file); 397 } 398 } 399 400 static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, 401 struct bpf_elf_sec_data *data) 402 { 403 Elf_Data *sec_edata; 404 GElf_Shdr sec_hdr; 405 Elf_Scn *sec_fd; 406 char *sec_name; 407 408 memset(data, 0, sizeof(*data)); 409 410 sec_fd = elf_getscn(ctx->elf_fd, section); 411 if (!sec_fd) 412 return -EINVAL; 413 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) 414 return -EIO; 415 416 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, 417 sec_hdr.sh_name); 418 if (!sec_name || !sec_hdr.sh_size) 419 return -ENOENT; 420 421 sec_edata = elf_getdata(sec_fd, NULL); 422 if (!sec_edata || elf_getdata(sec_fd, sec_edata)) 423 return -EIO; 424 425 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); 426 427 data->sec_name = sec_name; 428 data->sec_data = sec_edata; 429 return 0; 430 } 431 432 static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, 433 struct bpf_elf_sec_data *data) 434 { 435 ctx->sym_tab = data->sec_data; 436 ctx->sym_num = data->sec_hdr.sh_size / 437 data->sec_hdr.sh_entsize; 438 return 0; 439 } 440 441 static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, 442 struct bpf_elf_sec_data *data) 443 { 444 ctx->str_tab = data->sec_data; 445 return 0; 446 } 447 448 static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section, 449 struct bpf_elf_sec_data *data) 450 { 451 ctx->map_tab = data->sec_data; 452 ctx->map_len = data->sec_data->d_size; 453 ctx->map_sec = section; 454 return 0; 455 } 456 457 static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb, 458 int exit) 459 { 460 int i, ret = 0, sym_num = bpf_map_num_sym(ctx); 461 struct bpf_elf_map *map; 462 unsigned long off; 463 const char *name; 464 465 if (sym_num == 0 || sym_num > 64) { 466 fprintf(stderr, "%u maps not supported in current map section!\n", 467 sym_num); 468 return -EINVAL; 469 } 470 471 if (ctx->map_len != sym_num * sizeof(struct bpf_elf_map)) { 472 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n"); 473 return -EINVAL; 474 } 475 476 ctx->map_len /= sym_num; 477 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) { 478 fprintf(stderr, "Different struct bpf_elf_map in use!\n"); 479 return -EINVAL; 480 } 481 482 ctx->map_num = sym_num; 483 for (i = 0, map = ctx->map_tab->d_buf; i < sym_num; i++, map++) { 484 if (map->pinning != PIN_GLOBAL_NS) 485 continue; 486 off = (void*)map - ctx->map_tab->d_buf; 487 name = bpf_map_fetch_name(ctx, off); 488 if (!name) { 489 fprintf(stderr, "Count not fetch map name at off %lu!\n", off); 490 return -EIO; 491 } 492 ret = cb(ctx, map, name, exit); 493 if (ret) 494 break; 495 } 496 497 return ret; 498 } 499 500 static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) 501 { 502 return ctx->sym_tab && ctx->str_tab && ctx->map_tab; 503 } 504 505 static int bpf_check_ancillary(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb, 506 int exit) 507 { 508 struct bpf_elf_sec_data data; 509 int i, ret = 0; 510 511 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 512 ret = bpf_fill_section_data(ctx, i, &data); 513 if (ret < 0) 514 continue; 515 if (data.sec_hdr.sh_type == SHT_PROGBITS && 516 !strcmp(data.sec_name, ELF_SECTION_MAPS)) 517 ret = bpf_fetch_maps_begin(ctx, i, &data); 518 else if (data.sec_hdr.sh_type == SHT_SYMTAB && 519 !strcmp(data.sec_name, ".symtab")) 520 ret = bpf_fetch_symtab(ctx, i, &data); 521 else if (data.sec_hdr.sh_type == SHT_STRTAB && 522 !strcmp(data.sec_name, ".strtab")) 523 ret = bpf_fetch_strtab(ctx, i, &data); 524 if (ret < 0) { 525 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", 526 i); 527 return ret; 528 } 529 } 530 531 if (bpf_has_map_data(ctx)) { 532 ret = bpf_fetch_maps_end(ctx, cb, exit); 533 if (ret < 0) { 534 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); 535 return ret; 536 } 537 } 538 539 return ret; 540 } 541 542 static int migrate_state(const char *pathname, bpf_handle_state_t cb, int exit) 543 { 544 struct bpf_elf_ctx ctx = {}; 545 int ret; 546 547 ret = bpf_elf_init(&ctx, pathname); 548 if (!ret) { 549 ret = bpf_check_ancillary(&ctx, cb, exit); 550 bpf_elf_close(&ctx); 551 } 552 return ret; 553 } 554 555 int main(int argc, char **argv) 556 { 557 const char *pathname = NULL; 558 bpf_handle_state_t fn = NULL; 559 int opt, exit = 0; 560 561 fs_base_init(); 562 563 openlog("cilium-map-migrate", LOG_NDELAY, 0); 564 while ((opt = getopt(argc, argv, "s:e:r:")) != -1) { 565 switch (opt) { 566 case 's': 567 case 'e': 568 pathname = optarg; 569 fn = opt == 's' ? 570 bpf_handle_pending : 571 bpf_handle_finalize; 572 break; 573 case 'r': 574 exit = atoi(optarg); 575 break; 576 default: 577 return -1; 578 } 579 } 580 581 if (fn == NULL) 582 return -1; 583 584 exit = pathname ? migrate_state(pathname, fn, exit) : -1; 585 closelog(); 586 return exit; 587 }