github.com/andybalholm/brotli@v1.0.6/metablock_distance.go (about)

     1  package brotli
     2  
     3  /* Copyright 2015 Google Inc. All Rights Reserved.
     4  
     5     Distributed under MIT license.
     6     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     7  */
     8  
     9  /* Greedy block splitter for one block category (literal, command or distance).
    10   */
    11  type blockSplitterDistance struct {
    12  	alphabet_size_     uint
    13  	min_block_size_    uint
    14  	split_threshold_   float64
    15  	num_blocks_        uint
    16  	split_             *blockSplit
    17  	histograms_        []histogramDistance
    18  	histograms_size_   *uint
    19  	target_block_size_ uint
    20  	block_size_        uint
    21  	curr_histogram_ix_ uint
    22  	last_histogram_ix_ [2]uint
    23  	last_entropy_      [2]float64
    24  	merge_last_count_  uint
    25  }
    26  
    27  func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
    28  	var max_num_blocks uint = num_symbols/min_block_size + 1
    29  	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
    30  	/* We have to allocate one more histogram than the maximum number of block
    31  	   types for the current histogram when the meta-block is too big. */
    32  	self.alphabet_size_ = alphabet_size
    33  
    34  	self.min_block_size_ = min_block_size
    35  	self.split_threshold_ = split_threshold
    36  	self.num_blocks_ = 0
    37  	self.split_ = split
    38  	self.histograms_size_ = histograms_size
    39  	self.target_block_size_ = min_block_size
    40  	self.block_size_ = 0
    41  	self.curr_histogram_ix_ = 0
    42  	self.merge_last_count_ = 0
    43  	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
    44  	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
    45  	self.split_.num_blocks = max_num_blocks
    46  	*histograms_size = max_num_types
    47  	if histograms == nil || cap(*histograms) < int(*histograms_size) {
    48  		*histograms = make([]histogramDistance, *histograms_size)
    49  	} else {
    50  		*histograms = (*histograms)[:*histograms_size]
    51  	}
    52  	self.histograms_ = *histograms
    53  
    54  	/* Clear only current histogram. */
    55  	histogramClearDistance(&self.histograms_[0])
    56  
    57  	self.last_histogram_ix_[1] = 0
    58  	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
    59  }
    60  
    61  /* Does either of three things:
    62     (1) emits the current block with a new block type;
    63     (2) emits the current block with the type of the second last block;
    64     (3) merges the current block with the last block. */
    65  func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
    66  	var split *blockSplit = self.split_
    67  	var last_entropy []float64 = self.last_entropy_[:]
    68  	var histograms []histogramDistance = self.histograms_
    69  	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
    70  	if self.num_blocks_ == 0 {
    71  		/* Create first block. */
    72  		split.lengths[0] = uint32(self.block_size_)
    73  
    74  		split.types[0] = 0
    75  		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
    76  		last_entropy[1] = last_entropy[0]
    77  		self.num_blocks_++
    78  		split.num_types++
    79  		self.curr_histogram_ix_++
    80  		if self.curr_histogram_ix_ < *self.histograms_size_ {
    81  			histogramClearDistance(&histograms[self.curr_histogram_ix_])
    82  		}
    83  		self.block_size_ = 0
    84  	} else if self.block_size_ > 0 {
    85  		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
    86  		var combined_histo [2]histogramDistance
    87  		var combined_entropy [2]float64
    88  		var diff [2]float64
    89  		var j uint
    90  		for j = 0; j < 2; j++ {
    91  			var last_histogram_ix uint = self.last_histogram_ix_[j]
    92  			combined_histo[j] = histograms[self.curr_histogram_ix_]
    93  			histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
    94  			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
    95  			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
    96  		}
    97  
    98  		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
    99  			/* Create new block. */
   100  			split.lengths[self.num_blocks_] = uint32(self.block_size_)
   101  
   102  			split.types[self.num_blocks_] = byte(split.num_types)
   103  			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
   104  			self.last_histogram_ix_[0] = uint(byte(split.num_types))
   105  			last_entropy[1] = last_entropy[0]
   106  			last_entropy[0] = entropy
   107  			self.num_blocks_++
   108  			split.num_types++
   109  			self.curr_histogram_ix_++
   110  			if self.curr_histogram_ix_ < *self.histograms_size_ {
   111  				histogramClearDistance(&histograms[self.curr_histogram_ix_])
   112  			}
   113  			self.block_size_ = 0
   114  			self.merge_last_count_ = 0
   115  			self.target_block_size_ = self.min_block_size_
   116  		} else if diff[1] < diff[0]-20.0 {
   117  			split.lengths[self.num_blocks_] = uint32(self.block_size_)
   118  			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
   119  			/* Combine this block with second last block. */
   120  
   121  			var tmp uint = self.last_histogram_ix_[0]
   122  			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
   123  			self.last_histogram_ix_[1] = tmp
   124  			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
   125  			last_entropy[1] = last_entropy[0]
   126  			last_entropy[0] = combined_entropy[1]
   127  			self.num_blocks_++
   128  			self.block_size_ = 0
   129  			histogramClearDistance(&histograms[self.curr_histogram_ix_])
   130  			self.merge_last_count_ = 0
   131  			self.target_block_size_ = self.min_block_size_
   132  		} else {
   133  			/* Combine this block with last block. */
   134  			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)
   135  
   136  			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
   137  			last_entropy[0] = combined_entropy[0]
   138  			if split.num_types == 1 {
   139  				last_entropy[1] = last_entropy[0]
   140  			}
   141  
   142  			self.block_size_ = 0
   143  			histogramClearDistance(&histograms[self.curr_histogram_ix_])
   144  			self.merge_last_count_++
   145  			if self.merge_last_count_ > 1 {
   146  				self.target_block_size_ += self.min_block_size_
   147  			}
   148  		}
   149  	}
   150  
   151  	if is_final {
   152  		*self.histograms_size_ = split.num_types
   153  		split.num_blocks = self.num_blocks_
   154  	}
   155  }
   156  
   157  /* Adds the next symbol to the current histogram. When the current histogram
   158     reaches the target size, decides on merging the block. */
   159  func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
   160  	histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
   161  	self.block_size_++
   162  	if self.block_size_ == self.target_block_size_ {
   163  		blockSplitterFinishBlockDistance(self, false) /* is_final = */
   164  	}
   165  }