github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/compress/libdeflate/matchfinder_common.h (about)

     1  /*
     2   * matchfinder_common.h - common code for Lempel-Ziv matchfinding
     3   */
     4  
     5  #ifndef LIB_MATCHFINDER_COMMON_H
     6  #define LIB_MATCHFINDER_COMMON_H
     7  
     8  #include "lib_common.h"
     9  #include "unaligned.h"
    10  
    11  #ifndef MATCHFINDER_WINDOW_ORDER
    12  #  error "MATCHFINDER_WINDOW_ORDER must be defined!"
    13  #endif
    14  
    15  #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
    16  
    17  typedef s16 mf_pos_t;
    18  
    19  #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
    20  
    21  #define MATCHFINDER_ALIGNMENT 8
    22  
    23  #define arch_matchfinder_init(data, size)	false
    24  #define arch_matchfinder_rebase(data, size)	false
    25  
    26  #ifdef _aligned_attribute
    27  #  if defined(__arm__) || defined(__aarch64__)
    28  #    include "arm/matchfinder_impl.h"
    29  #  elif defined(__i386__) || defined(__x86_64__)
    30  #    include "matchfinder_impl.h"
    31  #  endif
    32  #endif
    33  
    34  /*
    35   * Initialize the hash table portion of the matchfinder.
    36   *
    37   * Essentially, this is an optimized memset().
    38   *
    39   * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
    40   */
    41  static forceinline void
    42  matchfinder_init(mf_pos_t *data, size_t num_entries)
    43  {
    44  	size_t i;
    45  
    46  	if (arch_matchfinder_init(data, num_entries * sizeof(data[0])))
    47  		return;
    48  
    49  	for (i = 0; i < num_entries; i++)
    50  		data[i] = MATCHFINDER_INITVAL;
    51  }
    52  
    53  /*
    54   * Slide the matchfinder by WINDOW_SIZE bytes.
    55   *
    56   * This must be called just after each WINDOW_SIZE bytes have been run through
    57   * the matchfinder.
    58   *
    59   * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
    60   * The effect is that all entries are updated to be relative to the current
    61   * position, rather than the position WINDOW_SIZE bytes prior.
    62   *
    63   * Underflow is detected and replaced with signed saturation.  This ensures that
    64   * once the sliding window has passed over a position, that position forever
    65   * remains out of bounds.
    66   *
    67   * The array passed in must contain all matchfinder data that is
    68   * position-relative.  Concretely, this will include the hash table as well as
    69   * the table of positions that is used to link together the sequences in each
    70   * hash bucket.  Note that in the latter table, the links are 1-ary in the case
    71   * of "hash chains", and 2-ary in the case of "binary trees".  In either case,
    72   * the links need to be rebased in the same way.
    73   */
    74  static forceinline void
    75  matchfinder_rebase(mf_pos_t *data, size_t num_entries)
    76  {
    77  	size_t i;
    78  
    79  	if (arch_matchfinder_rebase(data, num_entries * sizeof(data[0])))
    80  		return;
    81  
    82  	if (MATCHFINDER_WINDOW_SIZE == 32768) {
    83  		/* Branchless version for 32768 byte windows.  If the value was
    84  		 * already negative, clear all bits except the sign bit; this
    85  		 * changes the value to -32768.  Otherwise, set the sign bit;
    86  		 * this is equivalent to subtracting 32768.  */
    87  		for (i = 0; i < num_entries; i++) {
    88  			u16 v = data[i];
    89  			u16 sign_bit = v & 0x8000;
    90  			v &= sign_bit - ((sign_bit >> 15) ^ 1);
    91  			v |= 0x8000;
    92  			data[i] = v;
    93  		}
    94  		return;
    95  	}
    96  
    97  	for (i = 0; i < num_entries; i++) {
    98  		if (data[i] >= 0)
    99  			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
   100  		else
   101  			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
   102  	}
   103  }
   104  
   105  /*
   106   * The hash function: given a sequence prefix held in the low-order bits of a
   107   * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
   108   * bits of the product that don't fit in a 32-bit value, but take the
   109   * next-highest @num_bits bits of the product as the hash value, as those have
   110   * the most randomness.
   111   */
   112  static forceinline u32
   113  lz_hash(u32 seq, unsigned num_bits)
   114  {
   115  	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
   116  }
   117  
   118  /*
   119   * Return the number of bytes at @matchptr that match the bytes at @strptr, up
   120   * to a maximum of @max_len.  Initially, @start_len bytes are matched.
   121   */
   122  static forceinline unsigned
   123  lz_extend(const u8 * const strptr, const u8 * const matchptr,
   124  	  const unsigned start_len, const unsigned max_len)
   125  {
   126  	unsigned len = start_len;
   127  	machine_word_t v_word;
   128  
   129  	if (UNALIGNED_ACCESS_IS_FAST) {
   130  
   131  		if (likely(max_len - len >= 4 * WORDBYTES)) {
   132  
   133  		#define COMPARE_WORD_STEP				\
   134  			v_word = load_word_unaligned(&matchptr[len]) ^	\
   135  				 load_word_unaligned(&strptr[len]);	\
   136  			if (v_word != 0)				\
   137  				goto word_differs;			\
   138  			len += WORDBYTES;				\
   139  
   140  			COMPARE_WORD_STEP
   141  			COMPARE_WORD_STEP
   142  			COMPARE_WORD_STEP
   143  			COMPARE_WORD_STEP
   144  		#undef COMPARE_WORD_STEP
   145  		}
   146  
   147  		while (len + WORDBYTES <= max_len) {
   148  			v_word = load_word_unaligned(&matchptr[len]) ^
   149  				 load_word_unaligned(&strptr[len]);
   150  			if (v_word != 0)
   151  				goto word_differs;
   152  			len += WORDBYTES;
   153  		}
   154  	}
   155  
   156  	while (len < max_len && matchptr[len] == strptr[len])
   157  		len++;
   158  	return len;
   159  
   160  word_differs:
   161  	if (CPU_IS_LITTLE_ENDIAN())
   162  		len += (bsfw(v_word) >> 3);
   163  	else
   164  		len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
   165  	return len;
   166  }
   167  
   168  #endif /* LIB_MATCHFINDER_COMMON_H */