github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/compress/libdeflate/unaligned.h (about) 1 /* 2 * unaligned.h - inline functions for unaligned memory accesses 3 */ 4 5 #ifndef LIB_UNALIGNED_H 6 #define LIB_UNALIGNED_H 7 8 #include "lib_common.h" 9 10 /* 11 * Naming note: 12 * 13 * {load,store}_*_unaligned() deal with raw bytes without endianness conversion. 14 * {get,put}_unaligned_*() deal with a specific endianness. 15 */ 16 17 DEFINE_UNALIGNED_TYPE(u16) 18 DEFINE_UNALIGNED_TYPE(u32) 19 DEFINE_UNALIGNED_TYPE(u64) 20 DEFINE_UNALIGNED_TYPE(machine_word_t) 21 22 #define load_word_unaligned load_machine_word_t_unaligned 23 #define store_word_unaligned store_machine_word_t_unaligned 24 25 /***** Unaligned loads *****/ 26 27 static forceinline u16 28 get_unaligned_le16(const u8 *p) 29 { 30 if (UNALIGNED_ACCESS_IS_FAST) 31 return le16_bswap(load_u16_unaligned(p)); 32 else 33 return ((u16)p[1] << 8) | p[0]; 34 } 35 36 static forceinline u16 37 get_unaligned_be16(const u8 *p) 38 { 39 if (UNALIGNED_ACCESS_IS_FAST) 40 return be16_bswap(load_u16_unaligned(p)); 41 else 42 return ((u16)p[0] << 8) | p[1]; 43 } 44 45 static forceinline u32 46 get_unaligned_le32(const u8 *p) 47 { 48 if (UNALIGNED_ACCESS_IS_FAST) 49 return le32_bswap(load_u32_unaligned(p)); 50 else 51 return ((u32)p[3] << 24) | ((u32)p[2] << 16) | 52 ((u32)p[1] << 8) | p[0]; 53 } 54 55 static forceinline u32 56 get_unaligned_be32(const u8 *p) 57 { 58 if (UNALIGNED_ACCESS_IS_FAST) 59 return be32_bswap(load_u32_unaligned(p)); 60 else 61 return ((u32)p[0] << 24) | ((u32)p[1] << 16) | 62 ((u32)p[2] << 8) | p[3]; 63 } 64 65 static forceinline u64 66 get_unaligned_le64(const u8 *p) 67 { 68 if (UNALIGNED_ACCESS_IS_FAST) 69 return le64_bswap(load_u64_unaligned(p)); 70 else 71 return ((u64)p[7] << 56) | ((u64)p[6] << 48) | 72 ((u64)p[5] << 40) | ((u64)p[4] << 32) | 73 ((u64)p[3] << 24) | ((u64)p[2] << 16) | 74 ((u64)p[1] << 8) | p[0]; 75 } 76 77 static forceinline machine_word_t 78 get_unaligned_leword(const u8 *p) 79 { 80 STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 81 if (WORDBITS == 32) 82 return get_unaligned_le32(p); 83 else 84 return get_unaligned_le64(p); 85 } 86 87 /***** Unaligned stores *****/ 88 89 static forceinline void 90 put_unaligned_le16(u16 v, u8 *p) 91 { 92 if (UNALIGNED_ACCESS_IS_FAST) { 93 store_u16_unaligned(le16_bswap(v), p); 94 } else { 95 p[0] = (u8)(v >> 0); 96 p[1] = (u8)(v >> 8); 97 } 98 } 99 100 static forceinline void 101 put_unaligned_be16(u16 v, u8 *p) 102 { 103 if (UNALIGNED_ACCESS_IS_FAST) { 104 store_u16_unaligned(be16_bswap(v), p); 105 } else { 106 p[0] = (u8)(v >> 8); 107 p[1] = (u8)(v >> 0); 108 } 109 } 110 111 static forceinline void 112 put_unaligned_le32(u32 v, u8 *p) 113 { 114 if (UNALIGNED_ACCESS_IS_FAST) { 115 store_u32_unaligned(le32_bswap(v), p); 116 } else { 117 p[0] = (u8)(v >> 0); 118 p[1] = (u8)(v >> 8); 119 p[2] = (u8)(v >> 16); 120 p[3] = (u8)(v >> 24); 121 } 122 } 123 124 static forceinline void 125 put_unaligned_be32(u32 v, u8 *p) 126 { 127 if (UNALIGNED_ACCESS_IS_FAST) { 128 store_u32_unaligned(be32_bswap(v), p); 129 } else { 130 p[0] = (u8)(v >> 24); 131 p[1] = (u8)(v >> 16); 132 p[2] = (u8)(v >> 8); 133 p[3] = (u8)(v >> 0); 134 } 135 } 136 137 static forceinline void 138 put_unaligned_le64(u64 v, u8 *p) 139 { 140 if (UNALIGNED_ACCESS_IS_FAST) { 141 store_u64_unaligned(le64_bswap(v), p); 142 } else { 143 p[0] = (u8)(v >> 0); 144 p[1] = (u8)(v >> 8); 145 p[2] = (u8)(v >> 16); 146 p[3] = (u8)(v >> 24); 147 p[4] = (u8)(v >> 32); 148 p[5] = (u8)(v >> 40); 149 p[6] = (u8)(v >> 48); 150 p[7] = (u8)(v >> 56); 151 } 152 } 153 154 static forceinline void 155 put_unaligned_leword(machine_word_t v, u8 *p) 156 { 157 STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 158 if (WORDBITS == 32) 159 put_unaligned_le32(v, p); 160 else 161 put_unaligned_le64(v, p); 162 } 163 164 /***** 24-bit loads *****/ 165 166 /* 167 * Given a 32-bit value that was loaded with the platform's native endianness, 168 * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 169 * bits contain the first 3 bytes, arranged in octets in a platform-dependent 170 * order, at the memory location from which the input 32-bit value was loaded. 171 */ 172 static forceinline u32 173 loaded_u32_to_u24(u32 v) 174 { 175 if (CPU_IS_LITTLE_ENDIAN()) 176 return v & 0xFFFFFF; 177 else 178 return v >> 8; 179 } 180 181 /* 182 * Load the next 3 bytes from the memory location @p into the 24 low-order bits 183 * of a 32-bit value. The order in which the 3 bytes will be arranged as octets 184 * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES 185 * bytes must be available at @p; note that this may be more than 3. 186 */ 187 static forceinline u32 188 load_u24_unaligned(const u8 *p) 189 { 190 #if UNALIGNED_ACCESS_IS_FAST 191 # define LOAD_U24_REQUIRED_NBYTES 4 192 return loaded_u32_to_u24(load_u32_unaligned(p)); 193 #else 194 # define LOAD_U24_REQUIRED_NBYTES 3 195 if (CPU_IS_LITTLE_ENDIAN()) 196 return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); 197 else 198 return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); 199 #endif 200 } 201 202 #endif /* LIB_UNALIGNED_H */