github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/compress/libdeflate/unaligned.h (about)

     1  /*
     2   * unaligned.h - inline functions for unaligned memory accesses
     3   */
     4  
     5  #ifndef LIB_UNALIGNED_H
     6  #define LIB_UNALIGNED_H
     7  
     8  #include "lib_common.h"
     9  
    10  /*
    11   * Naming note:
    12   *
    13   * {load,store}_*_unaligned() deal with raw bytes without endianness conversion.
    14   * {get,put}_unaligned_*() deal with a specific endianness.
    15   */
    16  
    17  DEFINE_UNALIGNED_TYPE(u16)
    18  DEFINE_UNALIGNED_TYPE(u32)
    19  DEFINE_UNALIGNED_TYPE(u64)
    20  DEFINE_UNALIGNED_TYPE(machine_word_t)
    21  
    22  #define load_word_unaligned	load_machine_word_t_unaligned
    23  #define store_word_unaligned	store_machine_word_t_unaligned
    24  
    25  /***** Unaligned loads  *****/
    26  
    27  static forceinline u16
    28  get_unaligned_le16(const u8 *p)
    29  {
    30  	if (UNALIGNED_ACCESS_IS_FAST)
    31  		return le16_bswap(load_u16_unaligned(p));
    32  	else
    33  		return ((u16)p[1] << 8) | p[0];
    34  }
    35  
    36  static forceinline u16
    37  get_unaligned_be16(const u8 *p)
    38  {
    39  	if (UNALIGNED_ACCESS_IS_FAST)
    40  		return be16_bswap(load_u16_unaligned(p));
    41  	else
    42  		return ((u16)p[0] << 8) | p[1];
    43  }
    44  
    45  static forceinline u32
    46  get_unaligned_le32(const u8 *p)
    47  {
    48  	if (UNALIGNED_ACCESS_IS_FAST)
    49  		return le32_bswap(load_u32_unaligned(p));
    50  	else
    51  		return ((u32)p[3] << 24) | ((u32)p[2] << 16) |
    52  			((u32)p[1] << 8) | p[0];
    53  }
    54  
    55  static forceinline u32
    56  get_unaligned_be32(const u8 *p)
    57  {
    58  	if (UNALIGNED_ACCESS_IS_FAST)
    59  		return be32_bswap(load_u32_unaligned(p));
    60  	else
    61  		return ((u32)p[0] << 24) | ((u32)p[1] << 16) |
    62  			((u32)p[2] << 8) | p[3];
    63  }
    64  
    65  static forceinline u64
    66  get_unaligned_le64(const u8 *p)
    67  {
    68  	if (UNALIGNED_ACCESS_IS_FAST)
    69  		return le64_bswap(load_u64_unaligned(p));
    70  	else
    71  		return ((u64)p[7] << 56) | ((u64)p[6] << 48) |
    72  			((u64)p[5] << 40) | ((u64)p[4] << 32) |
    73  			((u64)p[3] << 24) | ((u64)p[2] << 16) |
    74  			((u64)p[1] << 8) | p[0];
    75  }
    76  
    77  static forceinline machine_word_t
    78  get_unaligned_leword(const u8 *p)
    79  {
    80  	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
    81  	if (WORDBITS == 32)
    82  		return get_unaligned_le32(p);
    83  	else
    84  		return get_unaligned_le64(p);
    85  }
    86  
    87  /***** Unaligned stores  *****/
    88  
    89  static forceinline void
    90  put_unaligned_le16(u16 v, u8 *p)
    91  {
    92  	if (UNALIGNED_ACCESS_IS_FAST) {
    93  		store_u16_unaligned(le16_bswap(v), p);
    94  	} else {
    95  		p[0] = (u8)(v >> 0);
    96  		p[1] = (u8)(v >> 8);
    97  	}
    98  }
    99  
   100  static forceinline void
   101  put_unaligned_be16(u16 v, u8 *p)
   102  {
   103  	if (UNALIGNED_ACCESS_IS_FAST) {
   104  		store_u16_unaligned(be16_bswap(v), p);
   105  	} else {
   106  		p[0] = (u8)(v >> 8);
   107  		p[1] = (u8)(v >> 0);
   108  	}
   109  }
   110  
   111  static forceinline void
   112  put_unaligned_le32(u32 v, u8 *p)
   113  {
   114  	if (UNALIGNED_ACCESS_IS_FAST) {
   115  		store_u32_unaligned(le32_bswap(v), p);
   116  	} else {
   117  		p[0] = (u8)(v >> 0);
   118  		p[1] = (u8)(v >> 8);
   119  		p[2] = (u8)(v >> 16);
   120  		p[3] = (u8)(v >> 24);
   121  	}
   122  }
   123  
   124  static forceinline void
   125  put_unaligned_be32(u32 v, u8 *p)
   126  {
   127  	if (UNALIGNED_ACCESS_IS_FAST) {
   128  		store_u32_unaligned(be32_bswap(v), p);
   129  	} else {
   130  		p[0] = (u8)(v >> 24);
   131  		p[1] = (u8)(v >> 16);
   132  		p[2] = (u8)(v >> 8);
   133  		p[3] = (u8)(v >> 0);
   134  	}
   135  }
   136  
   137  static forceinline void
   138  put_unaligned_le64(u64 v, u8 *p)
   139  {
   140  	if (UNALIGNED_ACCESS_IS_FAST) {
   141  		store_u64_unaligned(le64_bswap(v), p);
   142  	} else {
   143  		p[0] = (u8)(v >> 0);
   144  		p[1] = (u8)(v >> 8);
   145  		p[2] = (u8)(v >> 16);
   146  		p[3] = (u8)(v >> 24);
   147  		p[4] = (u8)(v >> 32);
   148  		p[5] = (u8)(v >> 40);
   149  		p[6] = (u8)(v >> 48);
   150  		p[7] = (u8)(v >> 56);
   151  	}
   152  }
   153  
   154  static forceinline void
   155  put_unaligned_leword(machine_word_t v, u8 *p)
   156  {
   157  	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
   158  	if (WORDBITS == 32)
   159  		put_unaligned_le32(v, p);
   160  	else
   161  		put_unaligned_le64(v, p);
   162  }
   163  
   164  /***** 24-bit loads *****/
   165  
   166  /*
   167   * Given a 32-bit value that was loaded with the platform's native endianness,
   168   * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
   169   * bits contain the first 3 bytes, arranged in octets in a platform-dependent
   170   * order, at the memory location from which the input 32-bit value was loaded.
   171   */
   172  static forceinline u32
   173  loaded_u32_to_u24(u32 v)
   174  {
   175  	if (CPU_IS_LITTLE_ENDIAN())
   176  		return v & 0xFFFFFF;
   177  	else
   178  		return v >> 8;
   179  }
   180  
   181  /*
   182   * Load the next 3 bytes from the memory location @p into the 24 low-order bits
   183   * of a 32-bit value.  The order in which the 3 bytes will be arranged as octets
   184   * in the 24 bits is platform-dependent.  At least LOAD_U24_REQUIRED_NBYTES
   185   * bytes must be available at @p; note that this may be more than 3.
   186   */
   187  static forceinline u32
   188  load_u24_unaligned(const u8 *p)
   189  {
   190  #if UNALIGNED_ACCESS_IS_FAST
   191  #  define LOAD_U24_REQUIRED_NBYTES 4
   192  	return loaded_u32_to_u24(load_u32_unaligned(p));
   193  #else
   194  #  define LOAD_U24_REQUIRED_NBYTES 3
   195  	if (CPU_IS_LITTLE_ENDIAN())
   196  		return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
   197  	else
   198  		return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
   199  #endif
   200  }
   201  
   202  #endif /* LIB_UNALIGNED_H */