github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/c/headers/common/hash.h (about)

     1  #ifndef __COMMON_HASH_H__
     2  
     3  #define __COMMON_HASH_H__
     4  
     5  #include "bpf/bpf_endian.h"
     6  #include <vmlinux.h>
     7  
     8  #include <maps.h>
     9  #include <common/logging.h>
    10  #include <common/common.h>
    11  
    12  #define MURMUR_SEED ((u32) 0x18273645) // same as in userland
    13  
    14  // PROTOTYPES
    15  
    16  u32 murmur32(const void *, u32);
    17  u32 hash_u32_and_u64(u32, u64);
    18  
    19  // FUNCTIONS
    20  
    21  // MurMurHash 3 x86 32-bit (https://en.wikipedia.org/wiki/MurmurHash): Small (u32), simple (for C
    22  // and Go), high performant, optimized and collision resistant hashing function. This function is
    23  // used to hash a task unique identifier (task pid + task_start_time). Userland uses this unique
    24  // identifier to identify a task and construct the process tree.
    25  
    26  // Murmur3 32-bit hash function implementation.
    27  
    28  u32 murmur32(const void *key, u32 len)
    29  {
    30      const u8 *data = (const u8 *) key;
    31      const int nblocks = len / 4;
    32  
    33      u32 h1 = MURMUR_SEED;
    34      u32 c1 = 0xcc9e2d51;
    35      u32 c2 = 0x1b873593;
    36  
    37      // Body
    38      const u32 *blocks = (const u32 *) (data + nblocks * 4);
    39  
    40      for (int i = -nblocks; i; i++) {
    41          u32 k1 = blocks[i];
    42          k1 *= c1;
    43          k1 = (k1 << 15) | (k1 >> 17);
    44          k1 *= c2;
    45  
    46          h1 ^= k1;
    47          h1 = (h1 << 13) | (h1 >> 19);
    48          h1 = h1 * 5 + 0xe6546b64;
    49      }
    50  
    51      // Tail
    52      const u8 *tail = (const u8 *) (data + nblocks * 4);
    53      u32 k1 = 0;
    54  
    55      switch (len & 3) {
    56          case 3:
    57              k1 ^= tail[2] << 16;
    58          case 2:
    59              k1 ^= tail[1] << 8;
    60          case 1:
    61              k1 ^= tail[0];
    62              k1 *= c1;
    63              k1 = (k1 << 15) | (k1 >> 17);
    64              k1 *= c2;
    65              h1 ^= k1;
    66      };
    67  
    68      // Final
    69      h1 ^= len;
    70      h1 ^= h1 >> 16;
    71      h1 *= 0x85ebca6b;
    72      h1 ^= h1 >> 13;
    73      h1 *= 0xc2b2ae35;
    74      h1 ^= h1 >> 16;
    75  
    76      return h1;
    77  }
    78  
    79  // Hash a u32 and a u64 into a u32. This function is used to hash a task unique identifier.
    80  // Identical to Golang (userland) HashU32AndU64 function: same hash for same input.
    81  
    82  u32 hash_u32_and_u64(u32 arg1, u64 arg2)
    83  {
    84      uint8_t buffer[sizeof(arg1) + sizeof(arg2)];
    85  
    86  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    87      arg1 = __builtin_bswap32(arg1); // network byte order is big endian, convert for ...
    88      arg2 = __builtin_bswap64(arg2); // ... consistent hashing among different endianness.
    89  #endif
    90  
    91      __builtin_memcpy(buffer, &arg1, sizeof(arg1));
    92      __builtin_memcpy(buffer + sizeof(arg1), &arg2, sizeof(arg2));
    93  
    94      return murmur32(buffer, 4 + 8); // 4 + 8 = sizeof(u32) + sizeof(u64)
    95  }
    96  
    97  // hash_task_id is a wrapper, around HashU32AndU64, that rounds up the timestamp argument to the
    98  // precision userland will obtain from the procfs (since start_time is measured in clock ticks).
    99  // This is needed so the process tree can be updated by procfs readings as well. The userland
   100  // precision is defined by USER_HZ, which is 100HZ in almost all cases (untrue for embedded systems
   101  
   102  u32 hash_task_id(u32 arg1, u64 arg2)
   103  {
   104      u64 round = arg2 / 10000000LL; // (1000000000 / USER_HZ) = 10000000
   105      round *= 10000000LL;
   106      return hash_u32_and_u64(arg1, round);
   107  }
   108  
   109  #endif