github.com/grailbio/base@v0.0.11/compress/libdeflate/matchfinder_impl.h (about)

     1  #ifndef GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_
     2  #define GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_
     3  /*
     4   * x86/matchfinder_impl.h - x86 implementations of matchfinder functions
     5   *
     6   * Copyright 2016 Eric Biggers
     7   *
     8   * Permission is hereby granted, free of charge, to any person
     9   * obtaining a copy of this software and associated documentation
    10   * files (the "Software"), to deal in the Software without
    11   * restriction, including without limitation the rights to use,
    12   * copy, modify, merge, publish, distribute, sublicense, and/or sell
    13   * copies of the Software, and to permit persons to whom the
    14   * Software is furnished to do so, subject to the following
    15   * conditions:
    16   *
    17   * The above copyright notice and this permission notice shall be
    18   * included in all copies or substantial portions of the Software.
    19   *
    20   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    21   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    22   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    23   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    24   * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    25   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    26   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    27   * OTHER DEALINGS IN THE SOFTWARE.
    28   */
    29  
    30  #ifdef __AVX2__
    31  #  if MATCHFINDER_ALIGNMENT < 32
    32  #    undef MATCHFINDER_ALIGNMENT
    33  #    define MATCHFINDER_ALIGNMENT 32
    34  #  endif
    35  #  include <immintrin.h>
    36  static forceinline bool
    37  matchfinder_init_avx2(mf_pos_t *data, size_t size)
    38  {
    39  	__m256i v, *p;
    40  	size_t n;
    41  
    42  	if (size % (sizeof(__m256i) * 4) != 0)
    43  		return false;
    44  
    45  	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
    46  	v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
    47  	p = (__m256i *)data;
    48  	n = size / (sizeof(__m256i) * 4);
    49  	do {
    50  		p[0] = v;
    51  		p[1] = v;
    52  		p[2] = v;
    53  		p[3] = v;
    54  		p += 4;
    55  	} while (--n);
    56  	return true;
    57  }
    58  
    59  static forceinline bool
    60  matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
    61  {
    62  	__m256i v, *p;
    63  	size_t n;
    64  
    65  	if (size % (sizeof(__m256i) * 4) != 0)
    66  		return false;
    67  
    68  	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
    69  	v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
    70  	p = (__m256i *)data;
    71  	n = size / (sizeof(__m256i) * 4);
    72  	do {
    73  		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
    74  		p[0] = _mm256_adds_epi16(p[0], v);
    75  		p[1] = _mm256_adds_epi16(p[1], v);
    76  		p[2] = _mm256_adds_epi16(p[2], v);
    77  		p[3] = _mm256_adds_epi16(p[3], v);
    78  		p += 4;
    79  	} while (--n);
    80  	return true;
    81  }
    82  #endif /* __AVX2__ */
    83  
    84  #ifdef __SSE2__
    85  #  if MATCHFINDER_ALIGNMENT < 16
    86  #    undef MATCHFINDER_ALIGNMENT
    87  #    define MATCHFINDER_ALIGNMENT 16
    88  #  endif
    89  #  include <emmintrin.h>
    90  static forceinline bool
    91  matchfinder_init_sse2(mf_pos_t *data, size_t size)
    92  {
    93  	__m128i v, *p;
    94  	size_t n;
    95  
    96  	if (size % (sizeof(__m128i) * 4) != 0)
    97  		return false;
    98  
    99  	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
   100  	v = _mm_set1_epi16(MATCHFINDER_INITVAL);
   101  	p = (__m128i *)data;
   102  	n = size / (sizeof(__m128i) * 4);
   103  	do {
   104  		p[0] = v;
   105  		p[1] = v;
   106  		p[2] = v;
   107  		p[3] = v;
   108  		p += 4;
   109  	} while (--n);
   110  	return true;
   111  }
   112  
   113  static forceinline bool
   114  matchfinder_rebase_sse2(mf_pos_t *data, size_t size)
   115  {
   116  	__m128i v, *p;
   117  	size_t n;
   118  
   119  	if (size % (sizeof(__m128i) * 4) != 0)
   120  		return false;
   121  
   122  	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
   123  	v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
   124  	p = (__m128i *)data;
   125  	n = size / (sizeof(__m128i) * 4);
   126  	do {
   127  		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
   128  		p[0] = _mm_adds_epi16(p[0], v);
   129  		p[1] = _mm_adds_epi16(p[1], v);
   130  		p[2] = _mm_adds_epi16(p[2], v);
   131  		p[3] = _mm_adds_epi16(p[3], v);
   132  		p += 4;
   133  	} while (--n);
   134  	return true;
   135  }
   136  #endif /* __SSE2__ */
   137  
   138  #undef arch_matchfinder_init
   139  static forceinline bool
   140  arch_matchfinder_init(mf_pos_t *data, size_t size)
   141  {
   142  #ifdef __AVX2__
   143  	if (matchfinder_init_avx2(data, size))
   144  		return true;
   145  #endif
   146  #ifdef __SSE2__
   147  	if (matchfinder_init_sse2(data, size))
   148  		return true;
   149  #endif
   150  	return false;
   151  }
   152  
   153  #undef arch_matchfinder_rebase
   154  static forceinline bool
   155  arch_matchfinder_rebase(mf_pos_t *data, size_t size)
   156  {
   157  #ifdef __AVX2__
   158  	if (matchfinder_rebase_avx2(data, size))
   159  		return true;
   160  #endif
   161  #ifdef __SSE2__
   162  	if (matchfinder_rebase_sse2(data, size))
   163  		return true;
   164  #endif
   165  	return false;
   166  }
   167  
   168  #endif  // GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_MATCHFINDER_IMPL_H_  NOLINT(whitespace/line_length)