github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/compress/libdeflate/crc32_impl.h (about)

     1  #ifndef GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_CRC32_IMPL_H_
     2  #define GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_CRC32_IMPL_H_
     3  /*
     4   * x86/crc32_impl.h - x86 implementations of CRC-32 checksum algorithm
     5   *
     6   * Copyright 2016 Eric Biggers
     7   *
     8   * Permission is hereby granted, free of charge, to any person
     9   * obtaining a copy of this software and associated documentation
    10   * files (the "Software"), to deal in the Software without
    11   * restriction, including without limitation the rights to use,
    12   * copy, modify, merge, publish, distribute, sublicense, and/or sell
    13   * copies of the Software, and to permit persons to whom the
    14   * Software is furnished to do so, subject to the following
    15   * conditions:
    16   *
    17   * The above copyright notice and this permission notice shall be
    18   * included in all copies or substantial portions of the Software.
    19   *
    20   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    21   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    22   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    23   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    24   * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    25   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    26   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    27   * OTHER DEALINGS IN THE SOFTWARE.
    28   */
    29  
    30  #include "cpu_features.h"
    31  
    32  /*
    33   * Include the PCLMUL/AVX implementation?  Although our PCLMUL-optimized CRC-32
    34   * function doesn't use any AVX intrinsics specifically, it can benefit a lot
    35   * from being compiled for an AVX target: on Skylake, ~16700 MB/s vs. ~10100
    36   * MB/s.  I expect this is related to the PCLMULQDQ instructions being assembled
    37   * in the newer three-operand form rather than the older two-operand form.
    38   *
    39   * Note: this is only needed if __AVX__ is *not* defined, since otherwise the
    40   * "regular" PCLMUL implementation would already be AVX enabled.
    41   */
    42  #undef DISPATCH_PCLMUL_AVX
    43  #if !defined(DEFAULT_IMPL) && !defined(__AVX__) &&	\
    44  	X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX_TARGET &&	\
    45  	(defined(__PCLMUL__) || COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS)
    46  #  define FUNCNAME		crc32_pclmul_avx
    47  #  define FUNCNAME_ALIGNED	crc32_pclmul_avx_aligned
    48  #  define ATTRIBUTES		__attribute__((target("pclmul,avx")))
    49  #  define DISPATCH		1
    50  #  define DISPATCH_PCLMUL_AVX	1
    51  #  include "crc32_pclmul_template.h"
    52  #endif
    53  
    54  /* PCLMUL implementation */
    55  #undef DISPATCH_PCLMUL
    56  #if !defined(DEFAULT_IMPL) &&	\
    57  	(defined(__PCLMUL__) || (X86_CPU_FEATURES_ENABLED &&	\
    58  				 COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS))
    59  #  define FUNCNAME		crc32_pclmul
    60  #  define FUNCNAME_ALIGNED	crc32_pclmul_aligned
    61  #  ifdef __PCLMUL__
    62  #    define ATTRIBUTES
    63  #    define DEFAULT_IMPL	crc32_pclmul
    64  #  else
    65  #    define ATTRIBUTES		__attribute__((target("pclmul")))
    66  #    define DISPATCH		1
    67  #    define DISPATCH_PCLMUL	1
    68  #  endif
    69  #  include "crc32_pclmul_template.h"
    70  #endif
    71  
    72  #ifdef DISPATCH
    73  static inline crc32_func_t
    74  arch_select_crc32_func(void)
    75  {
    76  	u32 features = get_cpu_features();
    77  
    78  #ifdef DISPATCH_PCLMUL_AVX
    79  	if ((features & X86_CPU_FEATURE_PCLMULQDQ) &&
    80  	    (features & X86_CPU_FEATURE_AVX))
    81  		return crc32_pclmul_avx;
    82  #endif
    83  #ifdef DISPATCH_PCLMUL
    84  	if (features & X86_CPU_FEATURE_PCLMULQDQ)
    85  		return crc32_pclmul;
    86  #endif
    87  	return NULL;
    88  }
    89  #endif /* DISPATCH */
    90  
    91  #endif  // GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_CRC32_IMPL_H_