github.com/datadog/cilium@v1.6.12/bpf/cilium-map-migrate.c (about)

     1  /*
     2   *  Copyright (C) 2017 Authors of Cilium
     3   *
     4   *  This program is free software; you can redistribute it and/or modify
     5   *  it under the terms of the GNU General Public License as published by
     6   *  the Free Software Foundation; either version 2 of the License, or
     7   *  (at your option) any later version.
     8   *
     9   *  This program is distributed in the hope that it will be useful,
    10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
    11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12   *  GNU General Public License for more details.
    13   *
    14   *  You should have received a copy of the GNU General Public License
    15   *  along with this program; if not, write to the Free Software
    16   *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
    17   *
    18   *  Parts from iproute2 bpf.c loader code:
    19   *
    20   *  This program is free software; you can distribute it and/or
    21   *  modify it under the terms of the GNU General Public License
    22   *  as published by the Free Software Foundation; either version
    23   *  2 of the License, or (at your option) any later version.
    24   *
    25   *  Authors:
    26   *
    27   *    Daniel Borkmann <daniel@iogearbox.net>
    28   *    Jiri Pirko <jiri@resnulli.us>
    29   *    Alexei Starovoitov <ast@kernel.org>
    30   */
    31  
    32  #include <stdio.h>
    33  #include <syslog.h>
    34  #include <stdlib.h>
    35  #include <unistd.h>
    36  #include <stdbool.h>
    37  #include <errno.h>
    38  #include <fcntl.h>
    39  #include <string.h>
    40  #include <limits.h>
    41  
    42  #include <sys/syscall.h>
    43  #include <sys/stat.h>
    44  
    45  #include <arpa/inet.h>
    46  
    47  #include <linux/bpf.h>
    48  
    49  #include "elf/libelf.h"
    50  #include "elf/gelf.h"
    51  
    52  #include "iproute2/bpf_elf.h"
    53  
    54  #ifndef EM_BPF
    55  # define EM_BPF		247
    56  #endif
    57  
    58  #define ELF_MAX_MAPS	64
    59  
    60  #define STATE_PENDING	"pending"
    61  
    62  #define BPF_ENV_MNT "CILIUM_BPF_MNT"
    63  
    64  struct bpf_elf_sec_data {
    65  	GElf_Shdr	sec_hdr;
    66  	Elf_Data	*sec_data;
    67  	const char	*sec_name;
    68  };
    69  
    70  struct bpf_elf_ctx {
    71  	GElf_Ehdr	elf_hdr;
    72  	Elf		*elf_fd;
    73  	Elf_Data	*sym_tab;
    74  	Elf_Data	*str_tab;
    75  	Elf_Data	*map_tab;
    76  	int		map_len;
    77  	int		map_num;
    78  	int		map_sec;
    79  	int		sym_num;
    80  	int		obj_fd;
    81  };
    82  
    83  static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
    84  {
    85  #ifndef __NR_bpf
    86  # if defined(__i386__)
    87  #  define __NR_bpf 357
    88  # elif defined(__x86_64__)
    89  #  define __NR_bpf 321
    90  # elif defined(__aarch64__)
    91  #  define __NR_bpf 280
    92  # else
    93  #  error __NR_bpf not defined.
    94  # endif
    95  #endif
    96  	return syscall(__NR_bpf, cmd, attr, size);
    97  }
    98  
    99  static int renameat2(int dfd1, const char *path1,
   100  		     int dfd2, const char *path2,
   101  		     unsigned int flags)
   102  {
   103  #ifndef __NR_renameat2
   104  # if defined(__i386__)
   105  #  define __NR_renameat2 353
   106  # elif defined(__x86_64__)
   107  #  define __NR_renameat2 316
   108  # elif defined(__aarch64__)
   109  #  define __NR_renameat2 276
   110  # else
   111  #  error __NR_renameat2 not defined.
   112  # endif
   113  #endif
   114  	return syscall(__NR_renameat2, dfd1, path1, dfd2, path2, flags);
   115  }
   116  
   117  static inline __u64 bpf_ptr_to_u64(const void *ptr)
   118  {
   119  	return (__u64)(unsigned long)ptr;
   120  }
   121  
   122  static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
   123  {
   124  	if (ctx->elf_hdr.e_type != ET_REL ||
   125  	    (ctx->elf_hdr.e_machine != EM_NONE &&
   126  	     ctx->elf_hdr.e_machine != EM_BPF) ||
   127  	    ctx->elf_hdr.e_version != EV_CURRENT) {
   128  		fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
   129  		return -EINVAL;
   130  	}
   131  
   132  	switch (ctx->elf_hdr.e_ident[EI_DATA]) {
   133  	default:
   134  		fprintf(stderr, "ELF format error, wrong endianness info?\n");
   135  		return -EINVAL;
   136  	case ELFDATA2LSB:
   137  		if (htons(1) == 1) {
   138  			fprintf(stderr,
   139  				"We are big endian, eBPF object is little endian!\n");
   140  			return -EIO;
   141  		}
   142  		break;
   143  	case ELFDATA2MSB:
   144  		if (htons(1) != 1) {
   145  			fprintf(stderr,
   146  				"We are little endian, eBPF object is big endian!\n");
   147  			return -EIO;
   148  		}
   149  		break;
   150  	}
   151  
   152  	return 0;
   153  }
   154  
   155  static int bpf_elf_init(struct bpf_elf_ctx *ctx, const char *pathname)
   156  {
   157  	int ret;
   158  
   159  	if (elf_version(EV_CURRENT) == EV_NONE)
   160  		return -EIO;
   161  	ctx->obj_fd = open(pathname, O_RDONLY);
   162  	if (ctx->obj_fd < 0)
   163  		return ctx->obj_fd;
   164  	ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
   165  	if (!ctx->elf_fd) {
   166  		ret = -EINVAL;
   167  		goto out_fd;
   168  	}
   169  	if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
   170  		ret = -EINVAL;
   171  		goto out_fd;
   172  	}
   173  	if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
   174  	    &ctx->elf_hdr) {
   175  		ret = -EIO;
   176  		goto out_elf;
   177  	}
   178  	ret = bpf_elf_check_ehdr(ctx);
   179  	if (ret < 0)
   180  		goto out_elf;
   181  	return 0;
   182  out_elf:
   183  	elf_end(ctx->elf_fd);
   184  out_fd:
   185  	close(ctx->obj_fd);
   186  	return ret;
   187  }
   188  
   189  static void bpf_elf_close(struct bpf_elf_ctx *ctx)
   190  {
   191  	elf_end(ctx->elf_fd);
   192  	close(ctx->obj_fd);
   193  }
   194  
   195  static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
   196  				    const GElf_Sym *sym)
   197  {
   198  	return ctx->str_tab->d_buf + sym->st_name;
   199  }
   200  
   201  static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
   202  {
   203  	GElf_Sym sym;
   204  	int off, i;
   205  
   206  	for (off = 0; off < end; off += ctx->map_len) {
   207  		/* Order doesn't need to be linear here, hence we walk
   208  		 * the table again.
   209  		 */
   210  		for (i = 0; i < ctx->sym_num; i++) {
   211  			if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
   212  				continue;
   213  			if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
   214  			    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
   215  			      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
   216  			    sym.st_shndx != ctx->map_sec)
   217  				continue;
   218  			if (sym.st_value == off)
   219  				break;
   220  			if (i == ctx->sym_num - 1)
   221  				return -1;
   222  		}
   223  	}
   224  
   225  	return off == end ? 0 : -1;
   226  }
   227  
   228  static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, unsigned long off)
   229  {
   230  	GElf_Sym sym;
   231  	int i;
   232  
   233  	for (i = 0; i < ctx->sym_num; i++) {
   234  		if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
   235  			continue;
   236  
   237  		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
   238  		    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
   239  		      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
   240  		    sym.st_shndx != ctx->map_sec ||
   241  		    sym.st_value != off)
   242  			continue;
   243  		return bpf_str_tab_name(ctx, &sym);
   244  	}
   245  
   246  	return NULL;
   247  }
   248  
   249  static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
   250  {
   251  	int i, num = 0;
   252  	GElf_Sym sym;
   253  
   254  	for (i = 0; i < ctx->sym_num; i++) {
   255  		if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
   256  			continue;
   257  
   258  		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
   259  		    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
   260  		      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
   261  		    sym.st_shndx != ctx->map_sec)
   262  			continue;
   263  		num++;
   264  	}
   265  
   266  	return num;
   267  }
   268  
   269  static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map)
   270  {
   271  	char file[PATH_MAX], buff[256];
   272  	unsigned int val;
   273  	FILE *fp;
   274  
   275  	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
   276  	memset(map, 0, sizeof(*map));
   277  
   278  	fp = fopen(file, "r");
   279  	if (!fp) {
   280  		fprintf(stderr, "No procfs support?!\n");
   281  		return -EIO;
   282  	}
   283  
   284  	while (fgets(buff, sizeof(buff), fp)) {
   285  		if (sscanf(buff, "map_type:\t%u", &val) == 1)
   286  			map->type = val;
   287  		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
   288  			map->size_key = val;
   289  		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
   290  			map->size_value = val;
   291  		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
   292  			map->max_elem = val;
   293  		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
   294  			map->flags = val;
   295  	}
   296  
   297  	fclose(fp);
   298  	return 0;
   299  }
   300  
   301  static int bpf_obj_get(const char *pathname)
   302  {
   303  	union bpf_attr attr = {};
   304  
   305  	attr.pathname = bpf_ptr_to_u64(pathname);
   306  	return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
   307  }
   308  
   309  typedef int (*bpf_handle_state_t)(struct bpf_elf_ctx *ctx,
   310  				  const struct bpf_elf_map *map,
   311  				  const char *name, int exit);
   312  
   313  char fs_base[PATH_MAX + 1];
   314  
   315  void fs_base_init()
   316  {
   317  	const char *mnt_env = getenv(BPF_ENV_MNT);
   318  
   319  	if (mnt_env) {
   320  		snprintf(fs_base, sizeof(fs_base), "%s/tc/globals", mnt_env);
   321  	} else {
   322  		strcpy(fs_base, "/sys/fs/bpf/tc/globals");
   323  	}
   324  }
   325  
   326  static int bpf_handle_pending(struct bpf_elf_ctx *ctx,
   327  			      const struct bpf_elf_map *map,
   328  			      const char *name, int exit)
   329  {
   330  	char file[PATH_MAX + 1], dest[PATH_MAX + 1];
   331  	struct bpf_elf_map pinned;
   332  	struct stat sb;
   333  	int fd, ret;
   334  
   335  	snprintf(file, sizeof(file), "%s/%s", fs_base, name);
   336  	ret = stat(file, &sb);
   337  	if (ret < 0) {
   338  		if (errno == ENOENT)
   339  			return 0;
   340  		fprintf(stderr, "Cannot stat node %s!\n", file);
   341  		return -errno;
   342  	}
   343  
   344  	fd = bpf_obj_get(file);
   345  	if (fd < 0) {
   346  		fprintf(stderr, "Cannot open pinned node %s!\n", file);
   347  		return -errno;
   348  	}
   349  	ret = bpf_derive_elf_map_from_fdinfo(fd, &pinned);
   350  	close(fd);
   351  	if (ret < 0) {
   352  		fprintf(stderr, "Cannot fetch fdinfo from %s!\n", file);
   353  		return ret;
   354  	}
   355  
   356  	pinned.id = map->id;
   357          pinned.pinning = map->pinning;
   358  	if (!memcmp(map, &pinned, sizeof(pinned)))
   359  		return 0;
   360  
   361  	snprintf(dest, sizeof(dest), "%s:%s", file, STATE_PENDING);
   362  	syslog(LOG_WARNING, "Property mismatch in %s, migrating node to %s!\n",
   363  	       file, dest);
   364  	utimensat(AT_FDCWD, file, NULL, 0);
   365  	return rename(file, dest);
   366  }
   367  
   368  static int bpf_handle_finalize(struct bpf_elf_ctx *ctx,
   369  			       const struct bpf_elf_map *map,
   370  			       const char *name, int exit)
   371  {
   372  	char file[PATH_MAX + 1], dest[PATH_MAX + 1];
   373  	struct stat sb;
   374  	int ret;
   375  
   376  	snprintf(file, sizeof(file), "%s/%s:%s", fs_base, name,
   377  		 STATE_PENDING);
   378  	ret = stat(file, &sb);
   379  	if (ret < 0) {
   380  		if (errno == ENOENT)
   381  			return 0;
   382  		fprintf(stderr, "Cannot stat node %s!\n", file);
   383  		return -errno;
   384  	}
   385  
   386  	if (exit) {
   387  		snprintf(dest, sizeof(dest), "%s/%s", fs_base, name);
   388  		syslog(LOG_WARNING, "Restoring migrated node %s into %s due to bad exit.\n",
   389  		       file, dest);
   390  		utimensat(AT_FDCWD, file, NULL, 0);
   391  		renameat2(AT_FDCWD, file, AT_FDCWD, dest, 1);
   392  		return 0;
   393  	} else {
   394  		syslog(LOG_WARNING, "Unlinking migrated node %s due to good exit.\n",
   395  		       file);
   396  		return unlink(file);
   397  	}
   398  }
   399  
   400  static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
   401  				 struct bpf_elf_sec_data *data)
   402  {
   403  	Elf_Data *sec_edata;
   404  	GElf_Shdr sec_hdr;
   405  	Elf_Scn *sec_fd;
   406  	char *sec_name;
   407  
   408  	memset(data, 0, sizeof(*data));
   409  
   410  	sec_fd = elf_getscn(ctx->elf_fd, section);
   411  	if (!sec_fd)
   412  		return -EINVAL;
   413  	if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
   414  		return -EIO;
   415  
   416  	sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
   417  			      sec_hdr.sh_name);
   418  	if (!sec_name || !sec_hdr.sh_size)
   419  		return -ENOENT;
   420  
   421  	sec_edata = elf_getdata(sec_fd, NULL);
   422  	if (!sec_edata || elf_getdata(sec_fd, sec_edata))
   423  		return -EIO;
   424  
   425  	memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
   426  
   427  	data->sec_name = sec_name;
   428  	data->sec_data = sec_edata;
   429  	return 0;
   430  }
   431  
   432  static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
   433  			    struct bpf_elf_sec_data *data)
   434  {
   435  	ctx->sym_tab = data->sec_data;
   436  	ctx->sym_num = data->sec_hdr.sh_size /
   437  		       data->sec_hdr.sh_entsize;
   438  	return 0;
   439  }
   440  
   441  static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
   442  			    struct bpf_elf_sec_data *data)
   443  {
   444  	ctx->str_tab = data->sec_data;
   445  	return 0;
   446  }
   447  
   448  static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
   449  				struct bpf_elf_sec_data *data)
   450  {
   451  	ctx->map_tab = data->sec_data;
   452  	ctx->map_len = data->sec_data->d_size;
   453  	ctx->map_sec = section;
   454  	return 0;
   455  }
   456  
   457  static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb,
   458  			      int exit)
   459  {
   460  	int i, ret = 0, sym_num = bpf_map_num_sym(ctx);
   461  	struct bpf_elf_map *map;
   462  	unsigned long off;
   463  	const char *name;
   464  
   465  	if (sym_num == 0 || sym_num > 64) {
   466  		fprintf(stderr, "%u maps not supported in current map section!\n",
   467  			sym_num);
   468  		return -EINVAL;
   469  	}
   470  
   471  	if (ctx->map_len != sym_num * sizeof(struct bpf_elf_map)) {
   472  		fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
   473  		return -EINVAL;
   474  	}
   475  
   476  	ctx->map_len /= sym_num;
   477  	if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
   478  		fprintf(stderr, "Different struct bpf_elf_map in use!\n");
   479  		return -EINVAL;
   480  	}
   481  
   482  	ctx->map_num = sym_num;
   483  	for (i = 0, map = ctx->map_tab->d_buf; i < sym_num; i++, map++) {
   484  		if (map->pinning != PIN_GLOBAL_NS)
   485  			continue;
   486  		off = (void*)map - ctx->map_tab->d_buf;
   487  		name = bpf_map_fetch_name(ctx, off);
   488  		if (!name) {
   489  			fprintf(stderr, "Count not fetch map name at off %lu!\n", off);
   490  			return -EIO;
   491  		}
   492  		ret = cb(ctx, map, name, exit);
   493  		if (ret)
   494  			break;
   495  	}
   496  
   497  	return ret;
   498  }
   499  
   500  static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
   501  {
   502  	return ctx->sym_tab && ctx->str_tab && ctx->map_tab;
   503  }
   504  
   505  static int bpf_check_ancillary(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb,
   506  			       int exit)
   507  {
   508  	struct bpf_elf_sec_data data;
   509  	int i, ret = 0;
   510  
   511  	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
   512  		ret = bpf_fill_section_data(ctx, i, &data);
   513  		if (ret < 0)
   514  			continue;
   515  		if (data.sec_hdr.sh_type == SHT_PROGBITS &&
   516  		    !strcmp(data.sec_name, ELF_SECTION_MAPS))
   517  			ret = bpf_fetch_maps_begin(ctx, i, &data);
   518  		else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
   519  			 !strcmp(data.sec_name, ".symtab"))
   520  			ret = bpf_fetch_symtab(ctx, i, &data);
   521  		else if (data.sec_hdr.sh_type == SHT_STRTAB &&
   522  			 !strcmp(data.sec_name, ".strtab"))
   523  			ret = bpf_fetch_strtab(ctx, i, &data);
   524  		if (ret < 0) {
   525  			fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
   526  				i);
   527  			return ret;
   528  		}
   529  	}
   530  
   531  	if (bpf_has_map_data(ctx)) {
   532  		ret = bpf_fetch_maps_end(ctx, cb, exit);
   533  		if (ret < 0) {
   534  			fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
   535  			return ret;
   536  		}
   537  	}
   538  
   539  	return ret;
   540  }
   541  
   542  static int migrate_state(const char *pathname, bpf_handle_state_t cb, int exit)
   543  {
   544  	struct bpf_elf_ctx ctx = {};
   545  	int ret;
   546  
   547  	ret = bpf_elf_init(&ctx, pathname);
   548  	if (!ret) {
   549  		ret = bpf_check_ancillary(&ctx, cb, exit);
   550  		bpf_elf_close(&ctx);
   551  	}
   552  	return ret;
   553  }
   554  
   555  int main(int argc, char **argv)
   556  {
   557  	const char *pathname = NULL;
   558  	bpf_handle_state_t fn = NULL;
   559  	int opt, exit = 0;
   560  
   561  	fs_base_init();
   562  
   563  	openlog("cilium-map-migrate", LOG_NDELAY, 0);
   564  	while ((opt = getopt(argc, argv, "s:e:r:")) != -1) {
   565  		switch (opt) {
   566  		case 's':
   567  		case 'e':
   568  			pathname = optarg;
   569  			fn = opt == 's' ?
   570  			     bpf_handle_pending :
   571  			     bpf_handle_finalize;
   572  			break;
   573  		case 'r':
   574  			exit = atoi(optarg);
   575  			break;
   576  		default:
   577  			return -1;
   578  		}
   579  	}
   580  
   581  	if (fn == NULL)
   582  		return -1;
   583  
   584  	exit = pathname ? migrate_state(pathname, fn, exit) : -1;
   585  	closelog();
   586  	return exit;
   587  }