github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/filesystem.h (about) 1 #ifndef __COMMON_FILESYSTEM_H__ 2 #define __COMMON_FILESYSTEM_H__ 3 4 #include <vmlinux.h> 5 #include <vmlinux_flavors.h> 6 7 #include <common/buffer.h> 8 #include <common/memory.h> 9 #include <common/consts.h> 10 11 // PROTOTYPES 12 13 statfunc u64 get_time_nanosec_timespec(struct timespec64 *); 14 statfunc u64 get_ctime_nanosec_from_inode(struct inode *); 15 statfunc struct dentry *get_mnt_root_ptr_from_vfsmnt(struct vfsmount *); 16 statfunc struct dentry *get_d_parent_ptr_from_dentry(struct dentry *); 17 statfunc struct qstr get_d_name_from_dentry(struct dentry *); 18 statfunc dev_t get_dev_from_file(struct file *); 19 statfunc unsigned long get_inode_nr_from_file(struct file *); 20 statfunc u64 get_ctime_nanosec_from_file(struct file *); 21 statfunc unsigned short get_inode_mode_from_file(struct file *); 22 statfunc struct path get_path_from_file(struct file *); 23 statfunc struct file *get_struct_file_from_fd(u64); 24 statfunc unsigned short get_inode_mode_from_fd(u64); 25 statfunc int check_fd_type(u64, u16); 26 statfunc unsigned long get_inode_nr_from_dentry(struct dentry *); 27 statfunc dev_t get_dev_from_dentry(struct dentry *); 28 statfunc u64 get_ctime_nanosec_from_dentry(struct dentry *); 29 statfunc size_t get_path_str_buf(struct path *, buf_t *); 30 statfunc void *get_path_str(struct path *); 31 statfunc file_id_t get_file_id(struct file *); 32 statfunc void *get_path_str_cached(struct file *); 33 statfunc void *get_dentry_path_str(struct dentry *); 34 statfunc file_info_t get_file_info(struct file *); 35 statfunc struct inode *get_inode_from_file(struct file *); 36 statfunc int get_standard_fds_from_struct_file(struct file *); 37 statfunc struct super_block *get_super_block_from_inode(struct inode *); 38 statfunc unsigned long get_s_magic_from_super_block(struct super_block *); 39 statfunc void fill_vfs_file_metadata(struct file *, u32, u8 *); 40 statfunc void fill_vfs_file_bin_args_io_data(io_data_t, bin_args_t *); 41 statfunc void fill_file_header(u8[FILE_MAGIC_HDR_SIZE], io_data_t); 42 statfunc void 43 fill_vfs_file_bin_args(u32, struct file *, loff_t *, io_data_t, size_t, int, bin_args_t *); 44 45 // FUNCTIONS 46 47 statfunc u64 get_time_nanosec_timespec(struct timespec64 *ts) 48 { 49 time64_t sec = BPF_CORE_READ(ts, tv_sec); 50 if (sec < 0) 51 return 0; 52 53 long ns = BPF_CORE_READ(ts, tv_nsec); 54 55 return (sec * 1000000000L) + ns; 56 } 57 58 statfunc u64 get_ctime_nanosec_from_inode(struct inode *inode) 59 { 60 struct timespec64 ts; 61 if (bpf_core_field_exists(inode->__i_ctime)) { // Version >= 6.6 62 ts = BPF_CORE_READ(inode, __i_ctime); 63 } else { 64 struct inode___older_v66 *old_inode = (void *) inode; 65 ts = BPF_CORE_READ(old_inode, i_ctime); 66 } 67 return get_time_nanosec_timespec(&ts); 68 } 69 70 statfunc struct dentry *get_mnt_root_ptr_from_vfsmnt(struct vfsmount *vfsmnt) 71 { 72 return BPF_CORE_READ(vfsmnt, mnt_root); 73 } 74 75 statfunc struct dentry *get_d_parent_ptr_from_dentry(struct dentry *dentry) 76 { 77 return BPF_CORE_READ(dentry, d_parent); 78 } 79 80 statfunc struct qstr get_d_name_from_dentry(struct dentry *dentry) 81 { 82 return BPF_CORE_READ(dentry, d_name); 83 } 84 85 statfunc dev_t get_dev_from_file(struct file *file) 86 { 87 return BPF_CORE_READ(file, f_inode, i_sb, s_dev); 88 } 89 90 statfunc unsigned long get_inode_nr_from_file(struct file *file) 91 { 92 return BPF_CORE_READ(file, f_inode, i_ino); 93 } 94 95 statfunc u64 get_ctime_nanosec_from_file(struct file *file) 96 { 97 struct inode *f_inode = BPF_CORE_READ(file, f_inode); 98 return get_ctime_nanosec_from_inode(f_inode); 99 } 100 101 statfunc unsigned short get_inode_mode_from_file(struct file *file) 102 { 103 return BPF_CORE_READ(file, f_inode, i_mode); 104 } 105 106 statfunc struct path get_path_from_file(struct file *file) 107 { 108 return BPF_CORE_READ(file, f_path); 109 } 110 111 statfunc struct file *get_struct_file_from_fd(u64 fd_num) 112 { 113 struct task_struct *task = (struct task_struct *) bpf_get_current_task(); 114 if (task == NULL) 115 return NULL; 116 117 struct file **files = BPF_CORE_READ(task, files, fdt, fd); 118 if (files == NULL) 119 return NULL; 120 121 struct file *file; 122 bpf_core_read(&file, sizeof(void *), &files[fd_num]); 123 if (file == NULL) 124 return NULL; 125 126 return file; 127 } 128 129 statfunc unsigned short get_inode_mode_from_fd(u64 fd) 130 { 131 struct file *f = get_struct_file_from_fd(fd); 132 if (f == NULL) { 133 return -1; 134 } 135 136 return BPF_CORE_READ(f, f_inode, i_mode); 137 } 138 139 statfunc int check_fd_type(u64 fd, u16 type) 140 { 141 unsigned short i_mode = get_inode_mode_from_fd(fd); 142 143 if ((i_mode & S_IFMT) == type) { 144 return 1; 145 } 146 147 return 0; 148 } 149 150 statfunc unsigned long get_inode_nr_from_dentry(struct dentry *dentry) 151 { 152 return BPF_CORE_READ(dentry, d_inode, i_ino); 153 } 154 155 statfunc dev_t get_dev_from_dentry(struct dentry *dentry) 156 { 157 return BPF_CORE_READ(dentry, d_inode, i_sb, s_dev); 158 } 159 160 statfunc u64 get_ctime_nanosec_from_dentry(struct dentry *dentry) 161 { 162 struct inode *d_inode = BPF_CORE_READ(dentry, d_inode); 163 return get_ctime_nanosec_from_inode(d_inode); 164 } 165 166 // Read the file path to the given buffer, returning the start offset of the path. 167 statfunc size_t get_path_str_buf(struct path *path, buf_t *out_buf) 168 { 169 if (path == NULL || out_buf == NULL) { 170 return 0; 171 } 172 173 struct path f_path; 174 bpf_probe_read_kernel(&f_path, sizeof(struct path), path); 175 char slash = '/'; 176 int zero = 0; 177 struct dentry *dentry = f_path.dentry; 178 struct vfsmount *vfsmnt = f_path.mnt; 179 struct mount *mnt_parent_p; 180 struct mount *mnt_p = real_mount(vfsmnt); 181 bpf_core_read(&mnt_parent_p, sizeof(struct mount *), &mnt_p->mnt_parent); 182 u32 buf_off = (MAX_PERCPU_BUFSIZE >> 1); 183 struct dentry *mnt_root; 184 struct dentry *d_parent; 185 struct qstr d_name; 186 unsigned int len; 187 unsigned int off; 188 int sz; 189 190 #pragma unroll 191 for (int i = 0; i < MAX_PATH_COMPONENTS; i++) { 192 mnt_root = get_mnt_root_ptr_from_vfsmnt(vfsmnt); 193 d_parent = get_d_parent_ptr_from_dentry(dentry); 194 if (dentry == mnt_root || dentry == d_parent) { 195 if (dentry != mnt_root) { 196 // We reached root, but not mount root - escaped? 197 break; 198 } 199 if (mnt_p != mnt_parent_p) { 200 // We reached root, but not global root - continue with mount point path 201 bpf_core_read(&dentry, sizeof(struct dentry *), &mnt_p->mnt_mountpoint); 202 bpf_core_read(&mnt_p, sizeof(struct mount *), &mnt_p->mnt_parent); 203 bpf_core_read(&mnt_parent_p, sizeof(struct mount *), &mnt_p->mnt_parent); 204 vfsmnt = &mnt_p->mnt; 205 continue; 206 } 207 // Global root - path fully parsed 208 break; 209 } 210 // Add this dentry name to path 211 d_name = get_d_name_from_dentry(dentry); 212 len = (d_name.len + 1) & (MAX_STRING_SIZE - 1); 213 off = buf_off - len; 214 // Is string buffer big enough for dentry name? 215 sz = 0; 216 if (off <= buf_off) { // verify no wrap occurred 217 len = len & ((MAX_PERCPU_BUFSIZE >> 1) - 1); 218 sz = bpf_probe_read_kernel_str( 219 &(out_buf->buf[off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]), len, (void *) d_name.name); 220 } else 221 break; 222 if (sz > 1) { 223 buf_off -= 1; // remove null byte termination with slash sign 224 bpf_probe_read_kernel(&(out_buf->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash); 225 buf_off -= sz - 1; 226 } else { 227 // If sz is 0 or 1 we have an error (path can't be null nor an empty string) 228 break; 229 } 230 dentry = d_parent; 231 } 232 if (buf_off == (MAX_PERCPU_BUFSIZE >> 1)) { 233 // memfd files have no path in the filesystem -> extract their name 234 buf_off = 0; 235 d_name = get_d_name_from_dentry(dentry); 236 bpf_probe_read_kernel_str(&(out_buf->buf[0]), MAX_STRING_SIZE, (void *) d_name.name); 237 } else { 238 // Add leading slash 239 buf_off -= 1; 240 bpf_probe_read_kernel(&(out_buf->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash); 241 // Null terminate the path string 242 bpf_probe_read_kernel(&(out_buf->buf[(MAX_PERCPU_BUFSIZE >> 1) - 1]), 1, &zero); 243 } 244 return buf_off; 245 } 246 247 statfunc void *get_path_str(struct path *path) 248 { 249 // Get per-cpu string buffer 250 buf_t *string_p = get_buf(STRING_BUF_IDX); 251 if (string_p == NULL) 252 return NULL; 253 254 size_t buf_off = get_path_str_buf(path, string_p); 255 return &string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]; 256 } 257 258 statfunc file_id_t get_file_id(struct file *file) 259 { 260 file_id_t file_id = {}; 261 if (file != NULL) { 262 file_id.ctime = get_ctime_nanosec_from_file(file); 263 file_id.device = get_dev_from_file(file); 264 file_id.inode = get_inode_nr_from_file(file); 265 } 266 return file_id; 267 } 268 269 // get_path_str_cached - get the path of a specific file, using and updating cache map. 270 statfunc void *get_path_str_cached(struct file *file) 271 { 272 file_id_t file_id = get_file_id(file); 273 path_buf_t *path = bpf_map_lookup_elem(&io_file_path_cache_map, &file_id); 274 if (path == NULL) { 275 // Get per-cpu string buffer 276 buf_t *string_p = get_buf(STRING_BUF_IDX); 277 if (string_p == NULL) 278 return NULL; 279 280 size_t buf_off = get_path_str_buf(__builtin_preserve_access_index(&file->f_path), string_p); 281 if (likely(sizeof(string_p->buf) > buf_off + sizeof(path_buf_t))) { 282 path = (path_buf_t *) (&string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]); 283 bpf_map_update_elem(&io_file_path_cache_map, &file_id, path, BPF_ANY); 284 } else { 285 return NULL; 286 } 287 } 288 return &path->buf; 289 } 290 291 statfunc void *get_dentry_path_str(struct dentry *dentry) 292 { 293 char slash = '/'; 294 int zero = 0; 295 296 u32 buf_off = (MAX_PERCPU_BUFSIZE >> 1); 297 298 // Get per-cpu string buffer 299 buf_t *string_p = get_buf(STRING_BUF_IDX); 300 if (string_p == NULL) 301 return NULL; 302 303 #pragma unroll 304 for (int i = 0; i < MAX_PATH_COMPONENTS; i++) { 305 struct dentry *d_parent = get_d_parent_ptr_from_dentry(dentry); 306 if (dentry == d_parent) { 307 break; 308 } 309 // Add this dentry name to path 310 struct qstr d_name = get_d_name_from_dentry(dentry); 311 unsigned int len = (d_name.len + 1) & (MAX_STRING_SIZE - 1); 312 unsigned int off = buf_off - len; 313 // Is string buffer big enough for dentry name? 314 int sz = 0; 315 if (off <= buf_off) { // verify no wrap occurred 316 len = len & ((MAX_PERCPU_BUFSIZE >> 1) - 1); 317 sz = bpf_probe_read_kernel_str( 318 &(string_p->buf[off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]), len, (void *) d_name.name); 319 } else 320 break; 321 if (sz > 1) { 322 buf_off -= 1; // remove null byte termination with slash sign 323 bpf_probe_read_kernel(&(string_p->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash); 324 buf_off -= sz - 1; 325 } else { 326 // If sz is 0 or 1 we have an error (path can't be null nor an empty string) 327 break; 328 } 329 dentry = d_parent; 330 } 331 332 if (buf_off == (MAX_PERCPU_BUFSIZE >> 1)) { 333 // memfd files have no path in the filesystem -> extract their name 334 buf_off = 0; 335 struct qstr d_name = get_d_name_from_dentry(dentry); 336 bpf_probe_read_kernel_str(&(string_p->buf[0]), MAX_STRING_SIZE, (void *) d_name.name); 337 } else { 338 // Add leading slash 339 buf_off -= 1; 340 bpf_probe_read_kernel(&(string_p->buf[buf_off & (MAX_PERCPU_BUFSIZE - 1)]), 1, &slash); 341 // Null terminate the path string 342 bpf_probe_read_kernel(&(string_p->buf[(MAX_PERCPU_BUFSIZE >> 1) - 1]), 1, &zero); 343 } 344 345 return &string_p->buf[buf_off]; 346 } 347 348 statfunc file_info_t get_file_info(struct file *file) 349 { 350 file_info_t file_info = {}; 351 if (file != NULL) { 352 file_info.pathname_p = get_path_str(__builtin_preserve_access_index(&file->f_path)); 353 file_info.id = get_file_id(file); 354 } 355 return file_info; 356 } 357 358 statfunc struct inode *get_inode_from_file(struct file *file) 359 { 360 return BPF_CORE_READ(file, f_inode); 361 } 362 363 // Return which of the standard FDs point to the given file as a bit field. 364 // The FDs matching bits are (1 << fd). 365 statfunc int get_standard_fds_from_struct_file(struct file *file) 366 { 367 struct task_struct *task = (struct task_struct *) bpf_get_current_task(); 368 if (task == NULL) { 369 return -1; 370 } 371 struct files_struct *files = (struct files_struct *) BPF_CORE_READ(task, files); 372 if (files == NULL) { 373 return -2; 374 } 375 struct file **fd = (struct file **) BPF_CORE_READ(files, fdt, fd); 376 if (fd == NULL) { 377 return -3; 378 } 379 380 int fds = 0; 381 #pragma unroll 382 for (int i = STDIN; i <= STDERR; i++) { 383 struct file *fd_file = NULL; 384 bpf_core_read(&fd_file, sizeof(struct file *), &fd[i]); 385 if (fd_file == file) { 386 fds |= 1 << i; 387 } 388 } 389 390 return fds; 391 } 392 393 statfunc struct super_block *get_super_block_from_inode(struct inode *f_inode) 394 { 395 return BPF_CORE_READ(f_inode, i_sb); 396 } 397 398 statfunc unsigned long get_s_magic_from_super_block(struct super_block *i_sb) 399 { 400 return BPF_CORE_READ(i_sb, s_magic); 401 } 402 403 // INTERNAL: STRUCTS BUILDING 404 // ----------------------------------------------------------------------- 405 406 statfunc void fill_vfs_file_metadata(struct file *file, u32 pid, u8 *metadata) 407 { 408 // Extract device id, inode number and mode 409 dev_t s_dev = get_dev_from_file(file); 410 unsigned long inode_nr = get_inode_nr_from_file(file); 411 unsigned short i_mode = get_inode_mode_from_file(file); 412 413 bpf_probe_read_kernel(metadata, 4, &s_dev); 414 bpf_probe_read_kernel(metadata + 4, 8, &inode_nr); 415 bpf_probe_read_kernel(metadata + 12, 4, &i_mode); 416 bpf_probe_read_kernel(metadata + 16, 4, &pid); 417 } 418 419 statfunc void fill_vfs_file_bin_args_io_data(io_data_t io_data, bin_args_t *bin_args) 420 { 421 bin_args->ptr = io_data.ptr; 422 bin_args->full_size = io_data.len; 423 424 // handle case of write using iovec 425 if (!io_data.is_buf && io_data.len > 0) { 426 bin_args->vec = io_data.ptr; 427 bin_args->iov_len = io_data.len; 428 bin_args->iov_idx = 0; 429 struct iovec io_vec; 430 bpf_probe_read_kernel(&io_vec, sizeof(struct iovec), &bin_args->vec[0]); 431 bin_args->ptr = io_vec.iov_base; 432 bin_args->full_size = io_vec.iov_len; 433 } 434 } 435 436 // Fill given bin_args_t argument with all needed information for vfs_file binary sending 437 statfunc void fill_vfs_file_bin_args(u32 type, 438 struct file *file, 439 loff_t *pos, 440 io_data_t io_data, 441 size_t write_bytes, 442 int pid, 443 bin_args_t *bin_args) 444 { 445 off_t start_pos; 446 447 bpf_probe_read_kernel(&start_pos, sizeof(off_t), pos); 448 449 // Calculate write start offset 450 if (start_pos != 0) 451 start_pos -= write_bytes; 452 453 bin_args->type = type; 454 fill_vfs_file_metadata(file, pid, &bin_args->metadata[0]); 455 bin_args->start_off = start_pos; 456 fill_vfs_file_bin_args_io_data(io_data, bin_args); 457 } 458 459 statfunc void fill_file_header(u8 header[FILE_MAGIC_HDR_SIZE], io_data_t io_data) 460 { 461 u32 len = (u32) io_data.len; 462 if (io_data.is_buf) { 463 // inline bounds check to force compiler to use the register of len 464 asm volatile("if %[size] < %[max_size] goto +1;\n" 465 "%[size] = %[max_size];\n" 466 : 467 : [size] "r"(len), [max_size] "i"(FILE_MAGIC_HDR_SIZE)); 468 bpf_probe_read(header, len, io_data.ptr); 469 } else { 470 struct iovec io_vec; 471 __builtin_memset(&io_vec, 0, sizeof(io_vec)); 472 bpf_probe_read(&io_vec, sizeof(struct iovec), io_data.ptr); 473 // inline bounds check to force compiler to use the register of len 474 asm volatile("if %[size] < %[max_size] goto +1;\n" 475 "%[size] = %[max_size];\n" 476 : 477 : [size] "r"(len), [max_size] "i"(FILE_MAGIC_HDR_SIZE)); 478 bpf_probe_read(header, len, io_vec.iov_base); 479 } 480 } 481 482 #endif