github.com/grailbio/base@v0.0.11/compress/libdeflate/bt_matchfinder.h (about)

     1  #ifndef GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_BT_MATCHFINDER_H_
     2  #define GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_BT_MATCHFINDER_H_
     3  /*
     4   * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
     5   *
     6   * Originally public domain; changes after 2016-09-07 are copyrighted.
     7   *
     8   * Copyright 2016 Eric Biggers
     9   *
    10   * Permission is hereby granted, free of charge, to any person
    11   * obtaining a copy of this software and associated documentation
    12   * files (the "Software"), to deal in the Software without
    13   * restriction, including without limitation the rights to use,
    14   * copy, modify, merge, publish, distribute, sublicense, and/or sell
    15   * copies of the Software, and to permit persons to whom the
    16   * Software is furnished to do so, subject to the following
    17   * conditions:
    18   *
    19   * The above copyright notice and this permission notice shall be
    20   * included in all copies or substantial portions of the Software.
    21   *
    22   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    23   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    24   * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    25   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    26   * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    27   * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    28   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    29   * OTHER DEALINGS IN THE SOFTWARE.
    30   *
    31   * ----------------------------------------------------------------------------
    32   *
    33   * This is a Binary Trees (bt) based matchfinder.
    34   *
    35   * The main data structure is a hash table where each hash bucket contains a
    36   * binary tree of sequences whose first 4 bytes share the same hash code.  Each
    37   * sequence is identified by its starting position in the input buffer.  Each
    38   * binary tree is always sorted such that each left child represents a sequence
    39   * lexicographically lesser than its parent and each right child represents a
    40   * sequence lexicographically greater than its parent.
    41   *
    42   * The algorithm processes the input buffer sequentially.  At each byte
    43   * position, the hash code of the first 4 bytes of the sequence beginning at
    44   * that position (the sequence being matched against) is computed.  This
    45   * identifies the hash bucket to use for that position.  Then, a new binary tree
    46   * node is created to represent the current sequence.  Then, in a single tree
    47   * traversal, the hash bucket's binary tree is searched for matches and is
    48   * re-rooted at the new node.
    49   *
    50   * Compared to the simpler algorithm that uses linked lists instead of binary
    51   * trees (see hc_matchfinder.h), the binary tree version gains more information
    52   * at each node visitation.  Ideally, the binary tree version will examine only
    53   * 'log(n)' nodes to find the same matches that the linked list version will
    54   * find by examining 'n' nodes.  In addition, the binary tree version can
    55   * examine fewer bytes at each node by taking advantage of the common prefixes
    56   * that result from the sort order, whereas the linked list version may have to
    57   * examine up to the full length of the match at each node.
    58   *
    59   * However, it is not always best to use the binary tree version.  It requires
    60   * nearly twice as much memory as the linked list version, and it takes time to
    61   * keep the binary trees sorted, even at positions where the compressor does not
    62   * need matches.  Generally, when doing fast compression on small buffers,
    63   * binary trees are the wrong approach.  They are best suited for thorough
    64   * compression and/or large buffers.
    65   *
    66   * ----------------------------------------------------------------------------
    67   */
    68  
    69  
    70  #include "matchfinder_common.h"
    71  
    72  #define BT_MATCHFINDER_HASH3_ORDER 16
    73  #define BT_MATCHFINDER_HASH3_WAYS  2
    74  #define BT_MATCHFINDER_HASH4_ORDER 16
    75  
    76  #define BT_MATCHFINDER_TOTAL_HASH_LENGTH		\
    77  	((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
    78  	 (1UL << BT_MATCHFINDER_HASH4_ORDER))
    79  
    80  /* Representation of a match found by the bt_matchfinder  */
    81  struct lz_match {
    82  
    83  	/* The number of bytes matched.  */
    84  	u16 length;
    85  
    86  	/* The offset back from the current position that was matched.  */
    87  	u16 offset;
    88  };
    89  
    90  struct bt_matchfinder {
    91  
    92  	/* The hash table for finding length 3 matches  */
    93  	mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
    94  
    95  	/* The hash table which contains the roots of the binary trees for
    96  	 * finding length 4+ matches  */
    97  	mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
    98  
    99  	/* The child node references for the binary trees.  The left and right
   100  	 * children of the node for the sequence with position 'pos' are
   101  	 * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively.  */
   102  	mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
   103  
   104  }
   105  #ifdef _aligned_attribute
   106  _aligned_attribute(MATCHFINDER_ALIGNMENT)
   107  #endif
   108  ;
   109  
   110  /* Prepare the matchfinder for a new input buffer.  */
   111  static forceinline void
   112  bt_matchfinder_init(struct bt_matchfinder *mf)
   113  {
   114  	matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_LENGTH);
   115  }
   116  
   117  static forceinline void
   118  bt_matchfinder_slide_window(struct bt_matchfinder *mf)
   119  {
   120  	matchfinder_rebase((mf_pos_t *)mf,
   121  			   sizeof(struct bt_matchfinder) / sizeof(mf_pos_t));
   122  }
   123  
   124  static forceinline mf_pos_t *
   125  bt_left_child(struct bt_matchfinder *mf, s32 node)
   126  {
   127  	return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
   128  }
   129  
   130  static forceinline mf_pos_t *
   131  bt_right_child(struct bt_matchfinder *mf, s32 node)
   132  {
   133  	return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
   134  }
   135  
   136  /* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
   137   * and bt_matchfinder_skip_position().  There must be sufficiently many bytes
   138   * remaining to load a 32-bit integer from the *next* position.  */
   139  #define BT_MATCHFINDER_REQUIRED_NBYTES	5
   140  
   141  /* Advance the binary tree matchfinder by one byte, optionally recording
   142   * matches.  @record_matches should be a compile-time constant.  */
   143  static forceinline struct lz_match *
   144  bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf,
   145  				const u8 * const restrict in_base,
   146  				const ptrdiff_t cur_pos,
   147  				const u32 max_len,
   148  				const u32 nice_len,
   149  				const u32 max_search_depth,
   150  				u32 * const restrict next_hashes,
   151  				u32 * const restrict best_len_ret,
   152  				struct lz_match * restrict lz_matchptr,
   153  				const bool record_matches)
   154  {
   155  	const u8 *in_next = in_base + cur_pos;
   156  	u32 depth_remaining = max_search_depth;
   157  	const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
   158  	u32 next_hashseq;
   159  	u32 hash3;
   160  	u32 hash4;
   161  	s32 cur_node;
   162  #if BT_MATCHFINDER_HASH3_WAYS >= 2
   163  	s32 cur_node_2;
   164  #endif
   165  	const u8 *matchptr;
   166  	mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
   167  	u32 best_lt_len, best_gt_len;
   168  	u32 len;
   169  	u32 best_len = 3;
   170  
   171  	STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
   172  		      BT_MATCHFINDER_HASH3_WAYS <= 2);
   173  
   174  	next_hashseq = get_unaligned_le32(in_next + 1);
   175  
   176  	hash3 = next_hashes[0];
   177  	hash4 = next_hashes[1];
   178  
   179  	next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
   180  	next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
   181  	prefetchw(&mf->hash3_tab[next_hashes[0]]);
   182  	prefetchw(&mf->hash4_tab[next_hashes[1]]);
   183  
   184  	cur_node = mf->hash3_tab[hash3][0];
   185  	mf->hash3_tab[hash3][0] = cur_pos;
   186  #if BT_MATCHFINDER_HASH3_WAYS >= 2
   187  	cur_node_2 = mf->hash3_tab[hash3][1];
   188  	mf->hash3_tab[hash3][1] = cur_node;
   189  #endif
   190  	if (record_matches && cur_node > cutoff) {
   191  		u32 seq3 = load_u24_unaligned(in_next);
   192  		if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
   193  			lz_matchptr->length = 3;
   194  			lz_matchptr->offset = in_next - &in_base[cur_node];
   195  			lz_matchptr++;
   196  		}
   197  	#if BT_MATCHFINDER_HASH3_WAYS >= 2
   198  		else if (cur_node_2 > cutoff &&
   199  			seq3 == load_u24_unaligned(&in_base[cur_node_2]))
   200  		{
   201  			lz_matchptr->length = 3;
   202  			lz_matchptr->offset = in_next - &in_base[cur_node_2];
   203  			lz_matchptr++;
   204  		}
   205  	#endif
   206  	}
   207  
   208  	cur_node = mf->hash4_tab[hash4];
   209  	mf->hash4_tab[hash4] = cur_pos;
   210  
   211  	pending_lt_ptr = bt_left_child(mf, cur_pos);
   212  	pending_gt_ptr = bt_right_child(mf, cur_pos);
   213  
   214  	if (cur_node <= cutoff) {
   215  		*pending_lt_ptr = MATCHFINDER_INITVAL;
   216  		*pending_gt_ptr = MATCHFINDER_INITVAL;
   217  		*best_len_ret = best_len;
   218  		return lz_matchptr;
   219  	}
   220  
   221  	best_lt_len = 0;
   222  	best_gt_len = 0;
   223  	len = 0;
   224  
   225  	for (;;) {
   226  		matchptr = &in_base[cur_node];
   227  
   228  		if (matchptr[len] == in_next[len]) {
   229  			len = lz_extend(in_next, matchptr, len + 1, max_len);
   230  			if (!record_matches || len > best_len) {
   231  				if (record_matches) {
   232  					best_len = len;
   233  					lz_matchptr->length = len;
   234  					lz_matchptr->offset = in_next - matchptr;
   235  					lz_matchptr++;
   236  				}
   237  				if (len >= nice_len) {
   238  					*pending_lt_ptr = *bt_left_child(mf, cur_node);
   239  					*pending_gt_ptr = *bt_right_child(mf, cur_node);
   240  					*best_len_ret = best_len;
   241  					return lz_matchptr;
   242  				}
   243  			}
   244  		}
   245  
   246  		if (matchptr[len] < in_next[len]) {
   247  			*pending_lt_ptr = cur_node;
   248  			pending_lt_ptr = bt_right_child(mf, cur_node);
   249  			cur_node = *pending_lt_ptr;
   250  			best_lt_len = len;
   251  			if (best_gt_len < len)
   252  				len = best_gt_len;
   253  		} else {
   254  			*pending_gt_ptr = cur_node;
   255  			pending_gt_ptr = bt_left_child(mf, cur_node);
   256  			cur_node = *pending_gt_ptr;
   257  			best_gt_len = len;
   258  			if (best_lt_len < len)
   259  				len = best_lt_len;
   260  		}
   261  
   262  		if (cur_node <= cutoff || !--depth_remaining) {
   263  			*pending_lt_ptr = MATCHFINDER_INITVAL;
   264  			*pending_gt_ptr = MATCHFINDER_INITVAL;
   265  			*best_len_ret = best_len;
   266  			return lz_matchptr;
   267  		}
   268  	}
   269  }
   270  
   271  /*
   272   * Retrieve a list of matches with the current position.
   273   *
   274   * @mf
   275   *	The matchfinder structure.
   276   * @in_base
   277   *	Pointer to the next byte in the input buffer to process _at the last
   278   *	time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
   279   * @cur_pos
   280   *	The current position in the input buffer relative to @in_base (the
   281   *	position of the sequence being matched against).
   282   * @max_len
   283   *	The maximum permissible match length at this position.  Must be >=
   284   *	BT_MATCHFINDER_REQUIRED_NBYTES.
   285   * @nice_len
   286   *	Stop searching if a match of at least this length is found.
   287   *	Must be <= @max_len.
   288   * @max_search_depth
   289   *	Limit on the number of potential matches to consider.  Must be >= 1.
   290   * @next_hashes
   291   *	The precomputed hash codes for the sequence beginning at @in_next.
   292   *	These will be used and then updated with the precomputed hashcodes for
   293   *	the sequence beginning at @in_next + 1.
   294   * @best_len_ret
   295   *	If a match of length >= 4 was found, then the length of the longest such
   296   *	match is written here; otherwise 3 is written here.  (Note: this is
   297   *	redundant with the 'struct lz_match' array, but this is easier for the
   298   *	compiler to optimize when inlined and the caller immediately does a
   299   *	check against 'best_len'.)
   300   * @lz_matchptr
   301   *	An array in which this function will record the matches.  The recorded
   302   *	matches will be sorted by strictly increasing length and (non-strictly)
   303   *	increasing offset.  The maximum number of matches that may be found is
   304   *	'nice_len - 2'.
   305   *
   306   * The return value is a pointer to the next available slot in the @lz_matchptr
   307   * array.  (If no matches were found, this will be the same as @lz_matchptr.)
   308   */
   309  static forceinline struct lz_match *
   310  bt_matchfinder_get_matches(struct bt_matchfinder *mf,
   311  			   const u8 *in_base,
   312  			   ptrdiff_t cur_pos,
   313  			   u32 max_len,
   314  			   u32 nice_len,
   315  			   u32 max_search_depth,
   316  			   u32 next_hashes[2],
   317  			   u32 *best_len_ret,
   318  			   struct lz_match *lz_matchptr)
   319  {
   320  	return bt_matchfinder_advance_one_byte(mf,
   321  					       in_base,
   322  					       cur_pos,
   323  					       max_len,
   324  					       nice_len,
   325  					       max_search_depth,
   326  					       next_hashes,
   327  					       best_len_ret,
   328  					       lz_matchptr,
   329  					       true);
   330  }
   331  
   332  /*
   333   * Advance the matchfinder, but don't record any matches.
   334   *
   335   * This is very similar to bt_matchfinder_get_matches() because both functions
   336   * must do hashing and tree re-rooting.
   337   */
   338  static forceinline void
   339  bt_matchfinder_skip_position(struct bt_matchfinder *mf,
   340  			     const u8 *in_base,
   341  			     ptrdiff_t cur_pos,
   342  			     u32 nice_len,
   343  			     u32 max_search_depth,
   344  			     u32 next_hashes[2])
   345  {
   346  	u32 best_len;
   347  	bt_matchfinder_advance_one_byte(mf,
   348  					in_base,
   349  					cur_pos,
   350  					nice_len,
   351  					nice_len,
   352  					max_search_depth,
   353  					next_hashes,
   354  					&best_len,
   355  					NULL,
   356  					false);
   357  }
   358  
   359  #endif  // GO_SRC_GITHUB_COM_GRAILBIO_BASE_COMPRESS_LIBDEFLATE_BT_MATCHFINDER_H_