github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/filesystem.h (about)

     1  #ifndef __COMMON_FILESYSTEM_H__
     2  #define __COMMON_FILESYSTEM_H__
     3  
     4  #include <vmlinux.h>
     5  #include <vmlinux_flavors.h>
     6  
     7  #include <common/buffer.h>
     8  #include <common/memory.h>
     9  #include <common/consts.h>
    10  
    11  // PROTOTYPES
    12  
    13  statfunc u64 get_time_nanosec_timespec(struct timespec64 *);
    14  statfunc u64 get_ctime_nanosec_from_inode(struct inode *);
    15  statfunc struct dentry *get_mnt_root_ptr_from_vfsmnt(struct vfsmount *);
    16  statfunc struct dentry *get_d_parent_ptr_from_dentry(struct dentry *);
    17  statfunc struct qstr get_d_name_from_dentry(struct dentry *);
    18  statfunc dev_t get_dev_from_file(struct file *);
    19  statfunc unsigned long get_inode_nr_from_file(struct file *);
    20  statfunc u64 get_ctime_nanosec_from_file(struct file *);
    21  statfunc unsigned short get_inode_mode_from_file(struct file *);
    22  statfunc struct path get_path_from_file(struct file *);
    23  statfunc struct file *get_struct_file_from_fd(u64);
    24  statfunc unsigned short get_inode_mode_from_fd(u64);
    25  statfunc int check_fd_type(u64, u16);
    26  statfunc unsigned long get_inode_nr_from_dentry(struct dentry *);
    27  statfunc dev_t get_dev_from_dentry(struct dentry *);
    28  statfunc u64 get_ctime_nanosec_from_dentry(struct dentry *);
    29  statfunc size_t get_path_str_buf(struct path *, buf_t *);
    30  statfunc void *get_path_str(struct path *);
    31  statfunc file_id_t get_file_id(struct file *);
    32  statfunc void *get_path_str_cached(struct file *);
    33  statfunc void *get_dentry_path_str(struct dentry *);
    34  statfunc file_info_t get_file_info(struct file *);
    35  statfunc struct inode *get_inode_from_file(struct file *);
    36  statfunc int get_standard_fds_from_struct_file(struct file *);
    37  statfunc struct super_block *get_super_block_from_inode(struct inode *);
    38  statfunc unsigned long get_s_magic_from_super_block(struct super_block *);
    39  statfunc void fill_vfs_file_metadata(struct file *, u32, u8 *);
    40  statfunc void fill_vfs_file_bin_args_io_data(io_data_t, bin_args_t *);
    41  statfunc void fill_file_header(u8[FILE_MAGIC_HDR_SIZE], io_data_t);
    42  statfunc void
    43  fill_vfs_file_bin_args(u32, struct file *, loff_t *, io_data_t, size_t, int, bin_args_t *);
    44  
    45  // FUNCTIONS
    46  
    47  statfunc u64 get_time_nanosec_timespec(struct timespec64 *ts)
    48  {
    49      time64_t sec = BPF_CORE_READ(ts, tv_sec);
    50      if (sec < 0)
    51          return 0;
    52  
    53      long ns = BPF_CORE_READ(ts, tv_nsec);
    54  
    55      return (sec * 1000000000L) + ns;
    56  }
    57  
    58  statfunc u64 get_ctime_nanosec_from_inode(struct inode *inode)
    59  {
    60      struct timespec64 ts;
    61      if (bpf_core_field_exists(inode->__i_ctime)) { // Version >= 6.6
    62          ts = BPF_CORE_READ(inode, __i_ctime);
    63      } else {
    64          struct inode___older_v66 *old_inode = (void *) inode;
    65          ts = BPF_CORE_READ(old_inode, i_ctime);
    66      }
    67      return get_time_nanosec_timespec(&ts);
    68  }
    69  
    70  statfunc struct dentry *get_mnt_root_ptr_from_vfsmnt(struct vfsmount *vfsmnt)
    71  {
    72      return BPF_CORE_READ(vfsmnt, mnt_root);
    73  }
    74  
    75  statfunc struct dentry *get_d_parent_ptr_from_dentry(struct dentry *dentry)
    76  {
    77      return BPF_CORE_READ(dentry, d_parent);
    78  }
    79  
    80  statfunc struct qstr get_d_name_from_dentry(struct dentry *dentry)
    81  {
    82      return BPF_CORE_READ(dentry, d_name);
    83  }
    84  
    85  statfunc dev_t get_dev_from_file(struct file *file)
    86  {
    87      return BPF_CORE_READ(file, f_inode, i_sb, s_dev);
    88  }
    89  
    90  statfunc unsigned long get_inode_nr_from_file(struct file *file)
    91  {
    92      return BPF_CORE_READ(file, f_inode, i_ino);
    93  }
    94  
    95  statfunc u64 get_ctime_nanosec_from_file(struct file *file)
    96  {
    97      struct inode *f_inode = BPF_CORE_READ(file, f_inode);
    98      return get_ctime_nanosec_from_inode(f_inode);
    99  }
   100  
   101  statfunc unsigned short get_inode_mode_from_file(struct file *file)
   102  {
   103      return BPF_CORE_READ(file, f_inode, i_mode);
   104  }
   105  
   106  statfunc struct path get_path_from_file(struct file *file)
   107  {
   108      return BPF_CORE_READ(file, f_path);
   109  }
   110  
   111  statfunc struct file *get_struct_file_from_fd(u64 fd_num)
   112  {
   113      struct task_struct *task = (struct task_struct *) bpf_get_current_task();
   114      if (task == NULL)
   115          return NULL;
   116  
   117      struct file **files = BPF_CORE_READ(task, files, fdt, fd);
   118      if (files == NULL)
   119          return NULL;
   120  
   121      struct file *file;
   122      bpf_core_read(&file, sizeof(void *), &files[fd_num]);
   123      if (file == NULL)
   124          return NULL;
   125  
   126      return file;
   127  }
   128  
   129  statfunc unsigned short get_inode_mode_from_fd(u64 fd)
   130  {
   131      struct file *f = get_struct_file_from_fd(fd);
   132      if (f == NULL) {
   133          return -1;
   134      }
   135  
   136      return BPF_CORE_READ(f, f_inode, i_mode);
   137  }
   138  
   139  statfunc int check_fd_type(u64 fd, u16 type)
   140  {
   141      unsigned short i_mode = get_inode_mode_from_fd(fd);
   142  
   143      if ((i_mode & S_IFMT) == type) {
   144          return 1;
   145      }
   146  
   147      return 0;
   148  }
   149  
   150  statfunc unsigned long get_inode_nr_from_dentry(struct dentry *dentry)
   151  {
   152      return BPF_CORE_READ(dentry, d_inode, i_ino);
   153  }
   154  
   155  statfunc dev_t get_dev_from_dentry(struct dentry *dentry)
   156  {
   157      return BPF_CORE_READ(dentry, d_inode, i_sb, s_dev);
   158  }
   159  
   160  statfunc u64 get_ctime_nanosec_from_dentry(struct dentry *dentry)
   161  {
   162      struct inode *d_inode = BPF_CORE_READ(dentry, d_inode);
   163      return get_ctime_nanosec_from_inode(d_inode);
   164  }
   165  
   166  // Read the file path to the given buffer, returning the start offset of the path.
   167  statfunc size_t get_path_str_buf(struct path *path, buf_t *out_buf)
   168  {
   169      if (path == NULL || out_buf == NULL) {
   170          return 0;
   171      }
   172  
   173      struct path f_path;
   174      bpf_probe_read_kernel(&f_path, sizeof(struct path), path);
   175      char slash = '/';
   176      int zero = 0;
   177      struct dentry *dentry = f_path.dentry;
   178      struct vfsmount *vfsmnt = f_path.mnt;
   179      struct mount *mnt_parent_p;
   180      struct mount *mnt_p = real_mount(vfsmnt);
   181      bpf_core_read(&mnt_parent_p, sizeof(struct mount *), &mnt_p->mnt_parent);
   182      u32 buf_off = (MAX_PERCPU_BUFSIZE >> 1);
   183      struct dentry *mnt_root;
   184      struct dentry *d_parent;
   185      struct qstr d_name;
   186      unsigned int len;
   187      unsigned int off;
   188      int sz;
   189  
   190  #pragma unroll
   191      for (int i = 0; i < MAX_PATH_COMPONENTS; i++) {
   192          mnt_root = get_mnt_root_ptr_from_vfsmnt(vfsmnt);
   193          d_parent = get_d_parent_ptr_from_dentry(dentry);
   194          if (dentry == mnt_root || dentry == d_parent) {
   195              if (dentry != mnt_root) {
   196                  // We reached root, but not mount root - escaped?
   197                  break;
   198              }
   199              if (mnt_p != mnt_parent_p) {
   200                  // We reached root, but not global root - continue with mount point path
   201                  bpf_core_read(&dentry, sizeof(struct dentry *), &mnt_p->mnt_mountpoint);
   202                  bpf_core_read(&mnt_p, sizeof(struct mount *), &mnt_p->mnt_parent);
   203                  bpf_core_read(&mnt_parent_p, sizeof(struct mount *), &mnt_p->mnt_parent);
   204                  vfsmnt = &mnt_p->mnt;
   205                  continue;
   206              }
   207              // Global root - path fully parsed
   208              break;
   209          }
   210          // Add this dentry name to path
   211          d_name = get_d_name_from_dentry(dentry);
   212          len = (d_name.len + 1) & (MAX_STRING_SIZE - 1);
   213          off = buf_off - len;
   214          // Is string buffer big enough for dentry name?
   215          sz = 0;
   216          if (off <= buf_off) { // verify no wrap occurred
   217              len = len & ((MAX_PERCPU_BUFSIZE >> 1) - 1);
   218              sz = bpf_probe_read_kernel_str(
   219                  &(out_buf->buf[off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]), len, (void *) d_name.name);
   220          } else
   221              break;
   222          if (sz > 1) {
   223              buf_off -= 1; // remove null byte termination with slash sign
   224              bpf_probe_read_kernel(&(out_buf->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash);
   225              buf_off -= sz - 1;
   226          } else {
   227              // If sz is 0 or 1 we have an error (path can't be null nor an empty string)
   228              break;
   229          }
   230          dentry = d_parent;
   231      }
   232      if (buf_off == (MAX_PERCPU_BUFSIZE >> 1)) {
   233          // memfd files have no path in the filesystem -> extract their name
   234          buf_off = 0;
   235          d_name = get_d_name_from_dentry(dentry);
   236          bpf_probe_read_kernel_str(&(out_buf->buf[0]), MAX_STRING_SIZE, (void *) d_name.name);
   237      } else {
   238          // Add leading slash
   239          buf_off -= 1;
   240          bpf_probe_read_kernel(&(out_buf->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash);
   241          // Null terminate the path string
   242          bpf_probe_read_kernel(&(out_buf->buf[(MAX_PERCPU_BUFSIZE >> 1) - 1]), 1, &zero);
   243      }
   244      return buf_off;
   245  }
   246  
   247  statfunc void *get_path_str(struct path *path)
   248  {
   249      // Get per-cpu string buffer
   250      buf_t *string_p = get_buf(STRING_BUF_IDX);
   251      if (string_p == NULL)
   252          return NULL;
   253  
   254      size_t buf_off = get_path_str_buf(path, string_p);
   255      return &string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)];
   256  }
   257  
   258  statfunc file_id_t get_file_id(struct file *file)
   259  {
   260      file_id_t file_id = {};
   261      if (file != NULL) {
   262          file_id.ctime = get_ctime_nanosec_from_file(file);
   263          file_id.device = get_dev_from_file(file);
   264          file_id.inode = get_inode_nr_from_file(file);
   265      }
   266      return file_id;
   267  }
   268  
   269  // get_path_str_cached - get the path of a specific file, using and updating cache map.
   270  statfunc void *get_path_str_cached(struct file *file)
   271  {
   272      file_id_t file_id = get_file_id(file);
   273      path_buf_t *path = bpf_map_lookup_elem(&io_file_path_cache_map, &file_id);
   274      if (path == NULL) {
   275          // Get per-cpu string buffer
   276          buf_t *string_p = get_buf(STRING_BUF_IDX);
   277          if (string_p == NULL)
   278              return NULL;
   279  
   280          size_t buf_off = get_path_str_buf(__builtin_preserve_access_index(&file->f_path), string_p);
   281          if (likely(sizeof(string_p->buf) > buf_off + sizeof(path_buf_t))) {
   282              path = (path_buf_t *) (&string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]);
   283              bpf_map_update_elem(&io_file_path_cache_map, &file_id, path, BPF_ANY);
   284          } else {
   285              return NULL;
   286          }
   287      }
   288      return &path->buf;
   289  }
   290  
   291  statfunc void *get_dentry_path_str(struct dentry *dentry)
   292  {
   293      char slash = '/';
   294      int zero = 0;
   295  
   296      u32 buf_off = (MAX_PERCPU_BUFSIZE >> 1);
   297  
   298      // Get per-cpu string buffer
   299      buf_t *string_p = get_buf(STRING_BUF_IDX);
   300      if (string_p == NULL)
   301          return NULL;
   302  
   303  #pragma unroll
   304      for (int i = 0; i < MAX_PATH_COMPONENTS; i++) {
   305          struct dentry *d_parent = get_d_parent_ptr_from_dentry(dentry);
   306          if (dentry == d_parent) {
   307              break;
   308          }
   309          // Add this dentry name to path
   310          struct qstr d_name = get_d_name_from_dentry(dentry);
   311          unsigned int len = (d_name.len + 1) & (MAX_STRING_SIZE - 1);
   312          unsigned int off = buf_off - len;
   313          // Is string buffer big enough for dentry name?
   314          int sz = 0;
   315          if (off <= buf_off) { // verify no wrap occurred
   316              len = len & ((MAX_PERCPU_BUFSIZE >> 1) - 1);
   317              sz = bpf_probe_read_kernel_str(
   318                  &(string_p->buf[off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]), len, (void *) d_name.name);
   319          } else
   320              break;
   321          if (sz > 1) {
   322              buf_off -= 1; // remove null byte termination with slash sign
   323              bpf_probe_read_kernel(&(string_p->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash);
   324              buf_off -= sz - 1;
   325          } else {
   326              // If sz is 0 or 1 we have an error (path can't be null nor an empty string)
   327              break;
   328          }
   329          dentry = d_parent;
   330      }
   331  
   332      if (buf_off == (MAX_PERCPU_BUFSIZE >> 1)) {
   333          // memfd files have no path in the filesystem -> extract their name
   334          buf_off = 0;
   335          struct qstr d_name = get_d_name_from_dentry(dentry);
   336          bpf_probe_read_kernel_str(&(string_p->buf[0]), MAX_STRING_SIZE, (void *) d_name.name);
   337      } else {
   338          // Add leading slash
   339          buf_off -= 1;
   340          bpf_probe_read_kernel(&(string_p->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash);
   341          // Null terminate the path string
   342          bpf_probe_read_kernel(&(string_p->buf[(MAX_PERCPU_BUFSIZE >> 1) - 1]), 1, &zero);
   343      }
   344  
   345      return &string_p->buf[buf_off];
   346  }
   347  
   348  statfunc file_info_t get_file_info(struct file *file)
   349  {
   350      file_info_t file_info = {};
   351      if (file != NULL) {
   352          file_info.pathname_p = get_path_str(__builtin_preserve_access_index(&file->f_path));
   353          file_info.id = get_file_id(file);
   354      }
   355      return file_info;
   356  }
   357  
   358  statfunc struct inode *get_inode_from_file(struct file *file)
   359  {
   360      return BPF_CORE_READ(file, f_inode);
   361  }
   362  
   363  // Return which of the standard FDs point to the given file as a bit field.
   364  // The FDs matching bits are (1 << fd).
   365  statfunc int get_standard_fds_from_struct_file(struct file *file)
   366  {
   367      struct task_struct *task = (struct task_struct *) bpf_get_current_task();
   368      if (task == NULL) {
   369          return -1;
   370      }
   371      struct files_struct *files = (struct files_struct *) BPF_CORE_READ(task, files);
   372      if (files == NULL) {
   373          return -2;
   374      }
   375      struct file **fd = (struct file **) BPF_CORE_READ(files, fdt, fd);
   376      if (fd == NULL) {
   377          return -3;
   378      }
   379  
   380      int fds = 0;
   381  #pragma unroll
   382      for (int i = STDIN; i <= STDERR; i++) {
   383          struct file *fd_file = NULL;
   384          bpf_core_read(&fd_file, sizeof(struct file *), &fd[i]);
   385          if (fd_file == file) {
   386              fds |= 1 << i;
   387          }
   388      }
   389  
   390      return fds;
   391  }
   392  
   393  statfunc struct super_block *get_super_block_from_inode(struct inode *f_inode)
   394  {
   395      return BPF_CORE_READ(f_inode, i_sb);
   396  }
   397  
   398  statfunc unsigned long get_s_magic_from_super_block(struct super_block *i_sb)
   399  {
   400      return BPF_CORE_READ(i_sb, s_magic);
   401  }
   402  
   403  // INTERNAL: STRUCTS BUILDING
   404  // -----------------------------------------------------------------------
   405  
   406  statfunc void fill_vfs_file_metadata(struct file *file, u32 pid, u8 *metadata)
   407  {
   408      // Extract device id, inode number and mode
   409      dev_t s_dev = get_dev_from_file(file);
   410      unsigned long inode_nr = get_inode_nr_from_file(file);
   411      unsigned short i_mode = get_inode_mode_from_file(file);
   412  
   413      bpf_probe_read_kernel(metadata, 4, &s_dev);
   414      bpf_probe_read_kernel(metadata + 4, 8, &inode_nr);
   415      bpf_probe_read_kernel(metadata + 12, 4, &i_mode);
   416      bpf_probe_read_kernel(metadata + 16, 4, &pid);
   417  }
   418  
   419  statfunc void fill_vfs_file_bin_args_io_data(io_data_t io_data, bin_args_t *bin_args)
   420  {
   421      bin_args->ptr = io_data.ptr;
   422      bin_args->full_size = io_data.len;
   423  
   424      // handle case of write using iovec
   425      if (!io_data.is_buf && io_data.len > 0) {
   426          bin_args->vec = io_data.ptr;
   427          bin_args->iov_len = io_data.len;
   428          bin_args->iov_idx = 0;
   429          struct iovec io_vec;
   430          bpf_probe_read_kernel(&io_vec, sizeof(struct iovec), &bin_args->vec[0]);
   431          bin_args->ptr = io_vec.iov_base;
   432          bin_args->full_size = io_vec.iov_len;
   433      }
   434  }
   435  
   436  // Fill given bin_args_t argument with all needed information for vfs_file binary sending
   437  statfunc void fill_vfs_file_bin_args(u32 type,
   438                                       struct file *file,
   439                                       loff_t *pos,
   440                                       io_data_t io_data,
   441                                       size_t write_bytes,
   442                                       int pid,
   443                                       bin_args_t *bin_args)
   444  {
   445      off_t start_pos;
   446  
   447      bpf_probe_read_kernel(&start_pos, sizeof(off_t), pos);
   448  
   449      // Calculate write start offset
   450      if (start_pos != 0)
   451          start_pos -= write_bytes;
   452  
   453      bin_args->type = type;
   454      fill_vfs_file_metadata(file, pid, &bin_args->metadata[0]);
   455      bin_args->start_off = start_pos;
   456      fill_vfs_file_bin_args_io_data(io_data, bin_args);
   457  }
   458  
   459  statfunc void fill_file_header(u8 header[FILE_MAGIC_HDR_SIZE], io_data_t io_data)
   460  {
   461      u32 len = (u32) io_data.len;
   462      if (io_data.is_buf) {
   463          // inline bounds check to force compiler to use the register of len
   464          asm volatile("if %[size] < %[max_size] goto +1;\n"
   465                       "%[size] = %[max_size];\n"
   466                       :
   467                       : [size] "r"(len), [max_size] "i"(FILE_MAGIC_HDR_SIZE));
   468          bpf_probe_read(header, len, io_data.ptr);
   469      } else {
   470          struct iovec io_vec;
   471          __builtin_memset(&io_vec, 0, sizeof(io_vec));
   472          bpf_probe_read(&io_vec, sizeof(struct iovec), io_data.ptr);
   473          // inline bounds check to force compiler to use the register of len
   474          asm volatile("if %[size] < %[max_size] goto +1;\n"
   475                       "%[size] = %[max_size];\n"
   476                       :
   477                       : [size] "r"(len), [max_size] "i"(FILE_MAGIC_HDR_SIZE));
   478          bpf_probe_read(header, len, io_vec.iov_base);
   479      }
   480  }
   481  
   482  #endif