github.com/grailbio/base@v0.0.11/compress/libdeflate/matchfinder_common.h (about) 1 /* 2 * matchfinder_common.h - common code for Lempel-Ziv matchfinding 3 */ 4 5 #ifndef LIB_MATCHFINDER_COMMON_H 6 #define LIB_MATCHFINDER_COMMON_H 7 8 #include "lib_common.h" 9 #include "unaligned.h" 10 11 #ifndef MATCHFINDER_WINDOW_ORDER 12 # error "MATCHFINDER_WINDOW_ORDER must be defined!" 13 #endif 14 15 #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER) 16 17 typedef s16 mf_pos_t; 18 19 #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) 20 21 #define MATCHFINDER_ALIGNMENT 8 22 23 #define arch_matchfinder_init(data, size) false 24 #define arch_matchfinder_rebase(data, size) false 25 26 #ifdef _aligned_attribute 27 # if defined(__arm__) || defined(__aarch64__) 28 # include "arm/matchfinder_impl.h" 29 # elif defined(__i386__) || defined(__x86_64__) 30 # include "matchfinder_impl.h" 31 # endif 32 #endif 33 34 /* 35 * Initialize the hash table portion of the matchfinder. 36 * 37 * Essentially, this is an optimized memset(). 38 * 39 * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary. 40 */ 41 static forceinline void 42 matchfinder_init(mf_pos_t *data, size_t num_entries) 43 { 44 size_t i; 45 46 if (arch_matchfinder_init(data, num_entries * sizeof(data[0]))) 47 return; 48 49 for (i = 0; i < num_entries; i++) 50 data[i] = MATCHFINDER_INITVAL; 51 } 52 53 /* 54 * Slide the matchfinder by WINDOW_SIZE bytes. 55 * 56 * This must be called just after each WINDOW_SIZE bytes have been run through 57 * the matchfinder. 58 * 59 * This will subtract WINDOW_SIZE bytes from each entry in the array specified. 60 * The effect is that all entries are updated to be relative to the current 61 * position, rather than the position WINDOW_SIZE bytes prior. 62 * 63 * Underflow is detected and replaced with signed saturation. This ensures that 64 * once the sliding window has passed over a position, that position forever 65 * remains out of bounds. 66 * 67 * The array passed in must contain all matchfinder data that is 68 * position-relative. Concretely, this will include the hash table as well as 69 * the table of positions that is used to link together the sequences in each 70 * hash bucket. Note that in the latter table, the links are 1-ary in the case 71 * of "hash chains", and 2-ary in the case of "binary trees". In either case, 72 * the links need to be rebased in the same way. 73 */ 74 static forceinline void 75 matchfinder_rebase(mf_pos_t *data, size_t num_entries) 76 { 77 size_t i; 78 79 if (arch_matchfinder_rebase(data, num_entries * sizeof(data[0]))) 80 return; 81 82 if (MATCHFINDER_WINDOW_SIZE == 32768) { 83 /* Branchless version for 32768 byte windows. If the value was 84 * already negative, clear all bits except the sign bit; this 85 * changes the value to -32768. Otherwise, set the sign bit; 86 * this is equivalent to subtracting 32768. */ 87 for (i = 0; i < num_entries; i++) { 88 u16 v = data[i]; 89 u16 sign_bit = v & 0x8000; 90 v &= sign_bit - ((sign_bit >> 15) ^ 1); 91 v |= 0x8000; 92 data[i] = v; 93 } 94 return; 95 } 96 97 for (i = 0; i < num_entries; i++) { 98 if (data[i] >= 0) 99 data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; 100 else 101 data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; 102 } 103 } 104 105 /* 106 * The hash function: given a sequence prefix held in the low-order bits of a 107 * 32-bit value, multiply by a carefully-chosen large constant. Discard any 108 * bits of the product that don't fit in a 32-bit value, but take the 109 * next-highest @num_bits bits of the product as the hash value, as those have 110 * the most randomness. 111 */ 112 static forceinline u32 113 lz_hash(u32 seq, unsigned num_bits) 114 { 115 return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); 116 } 117 118 /* 119 * Return the number of bytes at @matchptr that match the bytes at @strptr, up 120 * to a maximum of @max_len. Initially, @start_len bytes are matched. 121 */ 122 static forceinline unsigned 123 lz_extend(const u8 * const strptr, const u8 * const matchptr, 124 const unsigned start_len, const unsigned max_len) 125 { 126 unsigned len = start_len; 127 machine_word_t v_word; 128 129 if (UNALIGNED_ACCESS_IS_FAST) { 130 131 if (likely(max_len - len >= 4 * WORDBYTES)) { 132 133 #define COMPARE_WORD_STEP \ 134 v_word = load_word_unaligned(&matchptr[len]) ^ \ 135 load_word_unaligned(&strptr[len]); \ 136 if (v_word != 0) \ 137 goto word_differs; \ 138 len += WORDBYTES; \ 139 140 COMPARE_WORD_STEP 141 COMPARE_WORD_STEP 142 COMPARE_WORD_STEP 143 COMPARE_WORD_STEP 144 #undef COMPARE_WORD_STEP 145 } 146 147 while (len + WORDBYTES <= max_len) { 148 v_word = load_word_unaligned(&matchptr[len]) ^ 149 load_word_unaligned(&strptr[len]); 150 if (v_word != 0) 151 goto word_differs; 152 len += WORDBYTES; 153 } 154 } 155 156 while (len < max_len && matchptr[len] == strptr[len]) 157 len++; 158 return len; 159 160 word_differs: 161 if (CPU_IS_LITTLE_ENDIAN()) 162 len += (bsfw(v_word) >> 3); 163 else 164 len += (WORDBITS - 1 - bsrw(v_word)) >> 3; 165 return len; 166 } 167 168 #endif /* LIB_MATCHFINDER_COMMON_H */