github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/buffer.h (about)

     1  #ifndef __COMMON_BUFFER_H__
     2  #define __COMMON_BUFFER_H__
     3  
     4  #include <vmlinux.h>
     5  
     6  #include <common/context.h>
     7  #include <common/hash.h>
     8  #include <common/network.h>
     9  
    10  // PROTOTYPES
    11  
    12  statfunc buf_t *get_buf(int);
    13  statfunc int save_to_submit_buf(args_buffer_t *, void *, u32, u8);
    14  statfunc int save_bytes_to_buf(args_buffer_t *, void *, u32, u8);
    15  statfunc int save_str_to_buf(args_buffer_t *, void *, u8);
    16  statfunc int add_u64_elements_to_buf(args_buffer_t *, const u64 __user *, int, volatile u32);
    17  statfunc int save_u64_arr_to_buf(args_buffer_t *, const u64 __user *, int, u8);
    18  statfunc int save_str_arr_to_buf(args_buffer_t *, const char __user *const __user *, u8);
    19  statfunc int save_args_str_arr_to_buf(args_buffer_t *, const char *, const char *, int, u8);
    20  statfunc int save_sockaddr_to_buf(args_buffer_t *, struct socket *, u8);
    21  statfunc int save_args_to_submit_buf(event_data_t *, args_t *);
    22  statfunc int events_perf_submit(program_data_t *, u32 id, long);
    23  statfunc int signal_perf_submit(void *, controlplane_signal_t *sig, u32 id);
    24  
    25  // FUNCTIONS
    26  
    27  statfunc buf_t *get_buf(int idx)
    28  {
    29      return bpf_map_lookup_elem(&bufs, &idx);
    30  }
    31  
    32  // biggest elem to be saved with 'save_to_submit_buf' should be defined here:
    33  #define MAX_ELEMENT_SIZE sizeof(struct sockaddr_un)
    34  
    35  statfunc int save_to_submit_buf(args_buffer_t *buf, void *ptr, u32 size, u8 index)
    36  {
    37      // Data saved to submit buf: [index][ ... buffer[size] ... ]
    38  
    39      if (size == 0)
    40          return 0;
    41  
    42      barrier();
    43      if (buf->offset > ARGS_BUF_SIZE - 1)
    44          return 0;
    45  
    46      // Save argument index
    47      buf->args[buf->offset] = index;
    48  
    49      // Satisfy verifier
    50      if (buf->offset > ARGS_BUF_SIZE - (MAX_ELEMENT_SIZE + 1))
    51          return 0;
    52  
    53      // Read into buffer
    54      if (bpf_probe_read(&(buf->args[buf->offset + 1]), size, ptr) == 0) {
    55          // We update offset only if all writes were successful
    56          buf->offset += size + 1;
    57          buf->argnum++;
    58          return 1;
    59      }
    60  
    61      return 0;
    62  }
    63  
    64  statfunc int save_to_submit_buf_kernel(args_buffer_t *buf, void *ptr, u32 size, u8 index)
    65  {
    66      // Data saved to submit buf: [index][ ... buffer[size] ... ]
    67  
    68      if (size == 0)
    69          return 0;
    70  
    71      barrier();
    72      if (buf->offset > ARGS_BUF_SIZE - 1)
    73          return 0;
    74  
    75      // Save argument index
    76      buf->args[buf->offset] = index;
    77  
    78      // Satisfy verifier
    79      if (buf->offset > ARGS_BUF_SIZE - (MAX_ELEMENT_SIZE + 1))
    80          return 0;
    81  
    82      // Read into buffer
    83      if (bpf_probe_read_kernel(&(buf->args[buf->offset + 1]), size, ptr) == 0) {
    84          // We update offset only if all writes were successful
    85          buf->offset += size + 1;
    86          buf->argnum++;
    87          return 1;
    88      }
    89  
    90      return 0;
    91  }
    92  
    93  statfunc int save_bytes_to_buf(args_buffer_t *buf, void *ptr, u32 size, u8 index)
    94  {
    95      // Data saved to submit buf: [index][size][ ... bytes ... ]
    96  
    97      if (size == 0)
    98          return 0;
    99  
   100      if (buf->offset > ARGS_BUF_SIZE - 1)
   101          return 0;
   102  
   103      // Save argument index
   104      buf->args[buf->offset] = index;
   105  
   106      if (buf->offset > ARGS_BUF_SIZE - (sizeof(int) + 1))
   107          return 0;
   108  
   109      // Save size to buffer
   110      if (bpf_probe_read(&(buf->args[buf->offset + 1]), sizeof(int), &size) != 0) {
   111          return 0;
   112      }
   113  
   114      if (buf->offset > ARGS_BUF_SIZE - (MAX_BYTES_ARR_SIZE + 1 + sizeof(int)))
   115          return 0;
   116  
   117      // Read bytes into buffer
   118      if (bpf_probe_read(&(buf->args[buf->offset + 1 + sizeof(int)]),
   119                         size & (MAX_BYTES_ARR_SIZE - 1),
   120                         ptr) == 0) {
   121          // We update offset only if all writes were successful
   122          buf->offset += size + 1 + sizeof(int);
   123          buf->argnum++;
   124          return 1;
   125      }
   126  
   127      return 0;
   128  }
   129  
   130  statfunc int save_str_to_buf(args_buffer_t *buf, void *ptr, u8 index)
   131  {
   132      // Data saved to submit buf: [index][size][ ... string ... ]
   133  
   134      if (buf->offset > ARGS_BUF_SIZE - 1)
   135          return 0;
   136  
   137      // Save argument index
   138      buf->args[buf->offset] = index;
   139  
   140      // Satisfy verifier for probe read
   141      if (buf->offset > ARGS_BUF_SIZE - (MAX_STRING_SIZE + 1 + sizeof(int)))
   142          return 0;
   143  
   144      // Read into buffer
   145      int sz = bpf_probe_read_str(&(buf->args[buf->offset + 1 + sizeof(int)]), MAX_STRING_SIZE, ptr);
   146      if (sz > 0) {
   147          barrier();
   148          // Satisfy verifier for probe read
   149          if (buf->offset > ARGS_BUF_SIZE - (MAX_STRING_SIZE + 1 + sizeof(int)))
   150              return 0;
   151  
   152          __builtin_memcpy(&(buf->args[buf->offset + 1]), &sz, sizeof(int));
   153          buf->offset += sz + sizeof(int) + 1;
   154          buf->argnum++;
   155          return 1;
   156      }
   157  
   158      return 0;
   159  }
   160  
   161  statfunc int
   162  add_u64_elements_to_buf(args_buffer_t *buf, const u64 __user *ptr, int len, volatile u32 count_off)
   163  {
   164      // save count_off into a new variable to avoid verifier errors
   165      u32 off = count_off;
   166      u8 elem_num = 0;
   167  #pragma unroll
   168      for (int i = 0; i < len; i++) {
   169          void *addr = &(buf->args[buf->offset]);
   170          if (buf->offset > ARGS_BUF_SIZE - sizeof(u64))
   171              // not enough space - return
   172              goto out;
   173          if (bpf_probe_read(addr, sizeof(u64), (void *) &ptr[i]) != 0)
   174              goto out;
   175          elem_num++;
   176          buf->offset += sizeof(u64);
   177      }
   178  out:
   179      // save number of elements in the array
   180      if (off > (ARGS_BUF_SIZE - 1))
   181          return 0;
   182  
   183      u8 current_elem_num = buf->args[off];
   184      buf->args[off] = current_elem_num + elem_num;
   185  
   186      return 1;
   187  }
   188  
   189  statfunc int save_u64_arr_to_buf(args_buffer_t *buf, const u64 *ptr, int len, u8 index)
   190  {
   191      // Data saved to submit buf: [index][u16 count][u64 1][u64 2][u64 3]...
   192      u16 restricted_len = (u16) len;
   193      u32 total_size = sizeof(u64) * restricted_len;
   194  
   195      if (buf->offset > ARGS_BUF_SIZE - 1)
   196          return 0;
   197  
   198      // Save argument index
   199      buf->args[buf->offset] = index;
   200  
   201      // Save number of elements
   202      if (buf->offset + sizeof(index) > ARGS_BUF_SIZE - sizeof(restricted_len))
   203          return 0;
   204      __builtin_memcpy(
   205          &(buf->args[buf->offset + sizeof(index)]), &restricted_len, sizeof(restricted_len));
   206  
   207      if ((buf->offset + sizeof(index) + sizeof(restricted_len) > ARGS_BUF_SIZE - MAX_BYTES_ARR_SIZE))
   208          return 0;
   209  
   210      if (bpf_probe_read(&(buf->args[buf->offset + sizeof(index) + sizeof(restricted_len)]),
   211                         total_size & (MAX_BYTES_ARR_SIZE - 1),
   212                         (void *) ptr) != 0)
   213          return 0;
   214  
   215      buf->argnum++;
   216      buf->offset += sizeof(index) + sizeof(restricted_len) + total_size;
   217  
   218      return 1;
   219  }
   220  
   221  statfunc int save_str_arr_to_buf(args_buffer_t *buf, const char __user *const __user *ptr, u8 index)
   222  {
   223      // Data saved to submit buf: [index][string count][str1 size][str1][str2 size][str2]...
   224  
   225      u8 elem_num = 0;
   226  
   227      if (buf->offset > ARGS_BUF_SIZE - 1)
   228          return 0;
   229  
   230      // Save argument index
   231      buf->args[buf->offset] = index;
   232  
   233      // Save space for number of elements (1 byte)
   234      u32 orig_off = buf->offset + 1;
   235      buf->offset += 2;
   236  
   237  #pragma unroll
   238      for (int i = 0; i < MAX_STR_ARR_ELEM; i++) {
   239          const char *argp = NULL;
   240          bpf_probe_read(&argp, sizeof(argp), &ptr[i]);
   241          if (!argp)
   242              goto out;
   243  
   244          if (buf->offset > ARGS_BUF_SIZE - MAX_STRING_SIZE - sizeof(int))
   245              // not enough space - return
   246              goto out;
   247  
   248          // Read into buffer
   249          int sz = bpf_probe_read_str(&(buf->args[buf->offset + sizeof(int)]), MAX_STRING_SIZE, argp);
   250          if (sz > 0) {
   251              if (buf->offset > ARGS_BUF_SIZE - sizeof(int))
   252                  // Satisfy validator
   253                  goto out;
   254              bpf_probe_read(&(buf->args[buf->offset]), sizeof(int), &sz);
   255              buf->offset += sz + sizeof(int);
   256              elem_num++;
   257              continue;
   258          } else {
   259              goto out;
   260          }
   261      }
   262      // handle truncated argument list
   263      char ellipsis[] = "...";
   264      if (buf->offset > ARGS_BUF_SIZE - MAX_STRING_SIZE - sizeof(int))
   265          // not enough space - return
   266          goto out;
   267  
   268      // Read into buffer
   269      int sz = bpf_probe_read_str(&(buf->args[buf->offset + sizeof(int)]), MAX_STRING_SIZE, ellipsis);
   270      if (sz > 0) {
   271          if (buf->offset > ARGS_BUF_SIZE - sizeof(int))
   272              // Satisfy validator
   273              goto out;
   274          bpf_probe_read(&(buf->args[buf->offset]), sizeof(int), &sz);
   275          buf->offset += sz + sizeof(int);
   276          elem_num++;
   277      }
   278  out:
   279      // save number of elements in the array
   280      if (orig_off > ARGS_BUF_SIZE - 1)
   281          return 0;
   282      buf->args[orig_off] = elem_num;
   283      buf->argnum++;
   284      return 1;
   285  }
   286  
   287  #define MAX_ARR_LEN 8192
   288  
   289  statfunc int save_args_str_arr_to_buf(
   290      args_buffer_t *buf, const char *start, const char *end, int elem_num, u8 index)
   291  {
   292      // Data saved to submit buf: [index][len][arg_len][arg #][null delimited string array]
   293      // Note: This helper saves null (0x00) delimited string array into buf
   294  
   295      if (start >= end)
   296          return 0;
   297  
   298      int len = end - start;
   299      if (len > (MAX_ARR_LEN - 1))
   300          len = MAX_ARR_LEN - 1;
   301  
   302      // Save argument index
   303      if (buf->offset > ARGS_BUF_SIZE - 1)
   304          return 0;
   305      buf->args[buf->offset] = index;
   306  
   307      // Satisfy validator for probe read
   308      if ((buf->offset + 1) > ARGS_BUF_SIZE - sizeof(int))
   309          return 0;
   310  
   311      // Save array length
   312      bpf_probe_read(&(buf->args[buf->offset + 1]), sizeof(int), &len);
   313  
   314      // Satisfy validator for probe read
   315      if ((buf->offset + 5) > ARGS_BUF_SIZE - sizeof(int))
   316          return 0;
   317  
   318      // Save number of arguments
   319      bpf_probe_read(&(buf->args[buf->offset + 5]), sizeof(int), &elem_num);
   320  
   321      // Satisfy validator for probe read
   322      if ((buf->offset + 9) > ARGS_BUF_SIZE - MAX_ARR_LEN)
   323          return 0;
   324  
   325      // Read into buffer
   326      if (bpf_probe_read(&(buf->args[buf->offset + 9]), len & (MAX_ARR_LEN - 1), start) == 0) {
   327          // We update offset only if all writes were successful
   328          buf->offset += len + 9;
   329          buf->argnum++;
   330          return 1;
   331      }
   332  
   333      return 0;
   334  }
   335  
   336  statfunc int save_sockaddr_to_buf(args_buffer_t *buf, struct socket *sock, u8 index)
   337  {
   338      struct sock *sk = get_socket_sock(sock);
   339  
   340      u16 family = get_sock_family(sk);
   341      if ((family != AF_INET) && (family != AF_INET6) && (family != AF_UNIX)) {
   342          return 0;
   343      }
   344  
   345      if (family == AF_INET) {
   346          net_conn_v4_t net_details = {};
   347          struct sockaddr_in local;
   348  
   349          get_network_details_from_sock_v4(sk, &net_details, 0);
   350          get_local_sockaddr_in_from_network_details(&local, &net_details, family);
   351  
   352          save_to_submit_buf(buf, (void *) &local, sizeof(struct sockaddr_in), index);
   353      } else if (family == AF_INET6) {
   354          net_conn_v6_t net_details = {};
   355          struct sockaddr_in6 local;
   356  
   357          get_network_details_from_sock_v6(sk, &net_details, 0);
   358          get_local_sockaddr_in6_from_network_details(&local, &net_details, family);
   359  
   360          save_to_submit_buf(buf, (void *) &local, sizeof(struct sockaddr_in6), index);
   361      } else if (family == AF_UNIX) {
   362          struct unix_sock *unix_sk = (struct unix_sock *) sk;
   363          struct sockaddr_un sockaddr = get_unix_sock_addr(unix_sk);
   364          save_to_submit_buf(buf, (void *) &sockaddr, sizeof(struct sockaddr_un), index);
   365      }
   366      return 0;
   367  }
   368  
   369  #define DEC_ARG(n, enc_arg) ((enc_arg >> (8 * n)) & 0xFF)
   370  
   371  statfunc int save_args_to_submit_buf(event_data_t *event, args_t *args)
   372  {
   373      unsigned int i;
   374      unsigned int rc = 0;
   375      unsigned int arg_num = 0;
   376      short family = 0;
   377  
   378      if (event->param_types == 0)
   379          return 0;
   380  
   381  #pragma unroll
   382      for (i = 0; i < 6; i++) {
   383          int size = 0;
   384          u8 type = DEC_ARG(i, event->param_types);
   385          u8 index = i;
   386          switch (type) {
   387              case NONE_T:
   388                  break;
   389              case INT_T:
   390                  size = sizeof(int);
   391                  break;
   392              case UINT_T:
   393                  size = sizeof(unsigned int);
   394                  break;
   395              case OFF_T_T:
   396                  size = sizeof(off_t);
   397                  break;
   398              case DEV_T_T:
   399                  size = sizeof(dev_t);
   400                  break;
   401              case MODE_T_T:
   402                  size = sizeof(mode_t);
   403                  break;
   404              case LONG_T:
   405                  size = sizeof(long);
   406                  break;
   407              case ULONG_T:
   408                  size = sizeof(unsigned long);
   409                  break;
   410              case SIZE_T_T:
   411                  size = sizeof(size_t);
   412                  break;
   413              case POINTER_T:
   414                  size = sizeof(void *);
   415                  break;
   416              case U8_T:
   417                  size = sizeof(u8);
   418                  break;
   419              case U16_T:
   420                  size = sizeof(u16);
   421                  break;
   422              case STR_T:
   423                  rc = save_str_to_buf(&(event->args_buf), (void *) args->args[i], index);
   424                  break;
   425              case SOCKADDR_T:
   426                  if (args->args[i]) {
   427                      bpf_probe_read(&family, sizeof(short), (void *) args->args[i]);
   428                      switch (family) {
   429                          case AF_UNIX:
   430                              size = sizeof(struct sockaddr_un);
   431                              break;
   432                          case AF_INET:
   433                              size = sizeof(struct sockaddr_in);
   434                              break;
   435                          case AF_INET6:
   436                              size = sizeof(struct sockaddr_in6);
   437                              break;
   438                          default:
   439                              size = sizeof(short);
   440                      }
   441                      rc = save_to_submit_buf(
   442                          &(event->args_buf), (void *) (args->args[i]), size, index);
   443                  } else {
   444                      rc = save_to_submit_buf(&(event->args_buf), &family, sizeof(short), index);
   445                  }
   446                  break;
   447              case INT_ARR_2_T:
   448                  size = sizeof(int[2]);
   449                  rc = save_to_submit_buf(&(event->args_buf), (void *) (args->args[i]), size, index);
   450                  break;
   451              case TIMESPEC_T:
   452                  size = sizeof(struct __kernel_timespec);
   453                  rc = save_to_submit_buf(&(event->args_buf), (void *) (args->args[i]), size, index);
   454                  break;
   455          }
   456          switch (type) {
   457              case NONE_T:
   458              case STR_T:
   459              case SOCKADDR_T:
   460              case INT_ARR_2_T:
   461              case TIMESPEC_T:
   462                  break;
   463              default:
   464                  rc = save_to_submit_buf(&(event->args_buf), (void *) &(args->args[i]), size, index);
   465                  break;
   466          }
   467          if (rc > 0) {
   468              arg_num++;
   469              rc = 0;
   470          }
   471      }
   472  
   473      return arg_num;
   474  }
   475  
   476  statfunc int events_perf_submit(program_data_t *p, u32 id, long ret)
   477  {
   478      p->event->context.eventid = id;
   479      p->event->context.retval = ret;
   480  
   481      if (p->event->context.task.tid == 0) {
   482          init_task_context(&p->event->context.task, p->task, p->config->options);
   483          // keep task_info updated
   484          bpf_probe_read_kernel(&p->task_info->context, sizeof(task_context_t), &p->event->context.task);
   485      }
   486  
   487      // Get Stack trace
   488      if (p->config->options & OPT_CAPTURE_STACK_TRACES) {
   489          int stack_id = bpf_get_stackid(p->ctx, &stack_addresses, BPF_F_USER_STACK);
   490          if (stack_id >= 0) {
   491              p->event->context.stack_id = stack_id;
   492          }
   493      }
   494  
   495      u32 size = sizeof(event_context_t) + sizeof(u8) +
   496                 p->event->args_buf.offset; // context + argnum + arg buffer size
   497  
   498      // inline bounds check to force compiler to use the register of size
   499      asm volatile("if %[size] < %[max_size] goto +1;\n"
   500                   "%[size] = %[max_size];\n"
   501                   :
   502                   : [size] "r"(size), [max_size] "i"(MAX_EVENT_SIZE));
   503  
   504      return bpf_perf_event_output(p->ctx, &events, BPF_F_CURRENT_CPU, p->event, size);
   505  }
   506  
   507  statfunc event_data_t *init_netflows_event_data()
   508  {
   509      int zero = 0;
   510      event_data_t *e = bpf_map_lookup_elem(&netflows_data_map, &zero);
   511      if (unlikely(e == NULL))
   512          return NULL;
   513  
   514      e->context.ts = bpf_ktime_get_ns();
   515      e->args_buf.argnum = 0;
   516      e->args_buf.offset = 0;
   517      return e;
   518  }
   519  
   520  statfunc int net_events_perf_submit(void *ctx, u32 id, event_data_t *event)
   521  {
   522      event->context.eventid = id;
   523  
   524      u32 size = sizeof(event_context_t) + sizeof(u8) +
   525                 event->args_buf.offset; // context + argnum + arg buffer size
   526  
   527      // inline bounds check to force compiler to use the register of size
   528      asm volatile("if %[size] < %[max_size] goto +1;\n"
   529                   "%[size] = %[max_size];\n"
   530                   :
   531                   : [size] "r"(size), [max_size] "i"(MAX_EVENT_SIZE));
   532  
   533      return bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event, size);
   534  }
   535  
   536  statfunc int signal_perf_submit(void *ctx, controlplane_signal_t *sig, u32 id)
   537  {
   538      sig->event_id = id;
   539  
   540      u32 size =
   541          sizeof(u32) + sizeof(u8) + sig->args_buf.offset; // signal id + argnum + arg buffer size
   542  
   543      // inline bounds check to force compiler to use the register of size
   544      asm volatile("if %[size] < %[max_size] goto +1;\n"
   545                   "%[size] = %[max_size];\n"
   546                   :
   547                   : [size] "r"(size), [max_size] "i"(MAX_SIGNAL_SIZE));
   548  
   549      return bpf_perf_event_output(ctx, &signals, BPF_F_CURRENT_CPU, sig, size);
   550  }
   551  
   552  #endif