github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/compress/libdeflate/matchfinder_impl.h (about) 1 #ifndef GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_ 2 #define GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_ 3 /* 4 * x86/matchfinder_impl.h - x86 implementations of matchfinder functions 5 * 6 * Copyright 2016 Eric Biggers 7 * 8 * Permission is hereby granted, free of charge, to any person 9 * obtaining a copy of this software and associated documentation 10 * files (the "Software"), to deal in the Software without 11 * restriction, including without limitation the rights to use, 12 * copy, modify, merge, publish, distribute, sublicense, and/or sell 13 * copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following 15 * conditions: 16 * 17 * The above copyright notice and this permission notice shall be 18 * included in all copies or substantial portions of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27 * OTHER DEALINGS IN THE SOFTWARE. 28 */ 29 30 #ifdef __AVX2__ 31 # if MATCHFINDER_ALIGNMENT < 32 32 # undef MATCHFINDER_ALIGNMENT 33 # define MATCHFINDER_ALIGNMENT 32 34 # endif 35 # include <immintrin.h> 36 static forceinline bool 37 matchfinder_init_avx2(mf_pos_t *data, size_t size) 38 { 39 __m256i v, *p; 40 size_t n; 41 42 if (size % (sizeof(__m256i) * 4) != 0) 43 return false; 44 45 STATIC_ASSERT(sizeof(mf_pos_t) == 2); 46 v = _mm256_set1_epi16(MATCHFINDER_INITVAL); 47 p = (__m256i *)data; 48 n = size / (sizeof(__m256i) * 4); 49 do { 50 p[0] = v; 51 p[1] = v; 52 p[2] = v; 53 p[3] = v; 54 p += 4; 55 } while (--n); 56 return true; 57 } 58 59 static forceinline bool 60 matchfinder_rebase_avx2(mf_pos_t *data, size_t size) 61 { 62 __m256i v, *p; 63 size_t n; 64 65 if (size % (sizeof(__m256i) * 4) != 0) 66 return false; 67 68 STATIC_ASSERT(sizeof(mf_pos_t) == 2); 69 v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); 70 p = (__m256i *)data; 71 n = size / (sizeof(__m256i) * 4); 72 do { 73 /* PADDSW: Add Packed Signed Integers With Signed Saturation */ 74 p[0] = _mm256_adds_epi16(p[0], v); 75 p[1] = _mm256_adds_epi16(p[1], v); 76 p[2] = _mm256_adds_epi16(p[2], v); 77 p[3] = _mm256_adds_epi16(p[3], v); 78 p += 4; 79 } while (--n); 80 return true; 81 } 82 #endif /* __AVX2__ */ 83 84 #ifdef __SSE2__ 85 # if MATCHFINDER_ALIGNMENT < 16 86 # undef MATCHFINDER_ALIGNMENT 87 # define MATCHFINDER_ALIGNMENT 16 88 # endif 89 # include <emmintrin.h> 90 static forceinline bool 91 matchfinder_init_sse2(mf_pos_t *data, size_t size) 92 { 93 __m128i v, *p; 94 size_t n; 95 96 if (size % (sizeof(__m128i) * 4) != 0) 97 return false; 98 99 STATIC_ASSERT(sizeof(mf_pos_t) == 2); 100 v = _mm_set1_epi16(MATCHFINDER_INITVAL); 101 p = (__m128i *)data; 102 n = size / (sizeof(__m128i) * 4); 103 do { 104 p[0] = v; 105 p[1] = v; 106 p[2] = v; 107 p[3] = v; 108 p += 4; 109 } while (--n); 110 return true; 111 } 112 113 static forceinline bool 114 matchfinder_rebase_sse2(mf_pos_t *data, size_t size) 115 { 116 __m128i v, *p; 117 size_t n; 118 119 if (size % (sizeof(__m128i) * 4) != 0) 120 return false; 121 122 STATIC_ASSERT(sizeof(mf_pos_t) == 2); 123 v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); 124 p = (__m128i *)data; 125 n = size / (sizeof(__m128i) * 4); 126 do { 127 /* PADDSW: Add Packed Signed Integers With Signed Saturation */ 128 p[0] = _mm_adds_epi16(p[0], v); 129 p[1] = _mm_adds_epi16(p[1], v); 130 p[2] = _mm_adds_epi16(p[2], v); 131 p[3] = _mm_adds_epi16(p[3], v); 132 p += 4; 133 } while (--n); 134 return true; 135 } 136 #endif /* __SSE2__ */ 137 138 #undef arch_matchfinder_init 139 static forceinline bool 140 arch_matchfinder_init(mf_pos_t *data, size_t size) 141 { 142 #ifdef __AVX2__ 143 if (matchfinder_init_avx2(data, size)) 144 return true; 145 #endif 146 #ifdef __SSE2__ 147 if (matchfinder_init_sse2(data, size)) 148 return true; 149 #endif 150 return false; 151 } 152 153 #undef arch_matchfinder_rebase 154 static forceinline bool 155 arch_matchfinder_rebase(mf_pos_t *data, size_t size) 156 { 157 #ifdef __AVX2__ 158 if (matchfinder_rebase_avx2(data, size)) 159 return true; 160 #endif 161 #ifdef __SSE2__ 162 if (matchfinder_rebase_sse2(data, size)) 163 return true; 164 #endif 165 return false; 166 } 167 168 #endif // GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_ NOLINT(whitespace/line_length)