github.com/andybalholm/brotli@v1.0.6/metablock_distance.go (about) 1 package brotli 2 3 /* Copyright 2015 Google Inc. All Rights Reserved. 4 5 Distributed under MIT license. 6 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 7 */ 8 9 /* Greedy block splitter for one block category (literal, command or distance). 10 */ 11 type blockSplitterDistance struct { 12 alphabet_size_ uint 13 min_block_size_ uint 14 split_threshold_ float64 15 num_blocks_ uint 16 split_ *blockSplit 17 histograms_ []histogramDistance 18 histograms_size_ *uint 19 target_block_size_ uint 20 block_size_ uint 21 curr_histogram_ix_ uint 22 last_histogram_ix_ [2]uint 23 last_entropy_ [2]float64 24 merge_last_count_ uint 25 } 26 27 func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) { 28 var max_num_blocks uint = num_symbols/min_block_size + 1 29 var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1) 30 /* We have to allocate one more histogram than the maximum number of block 31 types for the current histogram when the meta-block is too big. */ 32 self.alphabet_size_ = alphabet_size 33 34 self.min_block_size_ = min_block_size 35 self.split_threshold_ = split_threshold 36 self.num_blocks_ = 0 37 self.split_ = split 38 self.histograms_size_ = histograms_size 39 self.target_block_size_ = min_block_size 40 self.block_size_ = 0 41 self.curr_histogram_ix_ = 0 42 self.merge_last_count_ = 0 43 brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) 44 brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) 45 self.split_.num_blocks = max_num_blocks 46 *histograms_size = max_num_types 47 if histograms == nil || cap(*histograms) < int(*histograms_size) { 48 *histograms = make([]histogramDistance, *histograms_size) 49 } else { 50 *histograms = (*histograms)[:*histograms_size] 51 } 52 self.histograms_ = *histograms 53 54 /* Clear only current histogram. */ 55 histogramClearDistance(&self.histograms_[0]) 56 57 self.last_histogram_ix_[1] = 0 58 self.last_histogram_ix_[0] = self.last_histogram_ix_[1] 59 } 60 61 /* Does either of three things: 62 (1) emits the current block with a new block type; 63 (2) emits the current block with the type of the second last block; 64 (3) merges the current block with the last block. */ 65 func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) { 66 var split *blockSplit = self.split_ 67 var last_entropy []float64 = self.last_entropy_[:] 68 var histograms []histogramDistance = self.histograms_ 69 self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_) 70 if self.num_blocks_ == 0 { 71 /* Create first block. */ 72 split.lengths[0] = uint32(self.block_size_) 73 74 split.types[0] = 0 75 last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_) 76 last_entropy[1] = last_entropy[0] 77 self.num_blocks_++ 78 split.num_types++ 79 self.curr_histogram_ix_++ 80 if self.curr_histogram_ix_ < *self.histograms_size_ { 81 histogramClearDistance(&histograms[self.curr_histogram_ix_]) 82 } 83 self.block_size_ = 0 84 } else if self.block_size_ > 0 { 85 var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_) 86 var combined_histo [2]histogramDistance 87 var combined_entropy [2]float64 88 var diff [2]float64 89 var j uint 90 for j = 0; j < 2; j++ { 91 var last_histogram_ix uint = self.last_histogram_ix_[j] 92 combined_histo[j] = histograms[self.curr_histogram_ix_] 93 histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix]) 94 combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_) 95 diff[j] = combined_entropy[j] - entropy - last_entropy[j] 96 } 97 98 if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ { 99 /* Create new block. */ 100 split.lengths[self.num_blocks_] = uint32(self.block_size_) 101 102 split.types[self.num_blocks_] = byte(split.num_types) 103 self.last_histogram_ix_[1] = self.last_histogram_ix_[0] 104 self.last_histogram_ix_[0] = uint(byte(split.num_types)) 105 last_entropy[1] = last_entropy[0] 106 last_entropy[0] = entropy 107 self.num_blocks_++ 108 split.num_types++ 109 self.curr_histogram_ix_++ 110 if self.curr_histogram_ix_ < *self.histograms_size_ { 111 histogramClearDistance(&histograms[self.curr_histogram_ix_]) 112 } 113 self.block_size_ = 0 114 self.merge_last_count_ = 0 115 self.target_block_size_ = self.min_block_size_ 116 } else if diff[1] < diff[0]-20.0 { 117 split.lengths[self.num_blocks_] = uint32(self.block_size_) 118 split.types[self.num_blocks_] = split.types[self.num_blocks_-2] 119 /* Combine this block with second last block. */ 120 121 var tmp uint = self.last_histogram_ix_[0] 122 self.last_histogram_ix_[0] = self.last_histogram_ix_[1] 123 self.last_histogram_ix_[1] = tmp 124 histograms[self.last_histogram_ix_[0]] = combined_histo[1] 125 last_entropy[1] = last_entropy[0] 126 last_entropy[0] = combined_entropy[1] 127 self.num_blocks_++ 128 self.block_size_ = 0 129 histogramClearDistance(&histograms[self.curr_histogram_ix_]) 130 self.merge_last_count_ = 0 131 self.target_block_size_ = self.min_block_size_ 132 } else { 133 /* Combine this block with last block. */ 134 split.lengths[self.num_blocks_-1] += uint32(self.block_size_) 135 136 histograms[self.last_histogram_ix_[0]] = combined_histo[0] 137 last_entropy[0] = combined_entropy[0] 138 if split.num_types == 1 { 139 last_entropy[1] = last_entropy[0] 140 } 141 142 self.block_size_ = 0 143 histogramClearDistance(&histograms[self.curr_histogram_ix_]) 144 self.merge_last_count_++ 145 if self.merge_last_count_ > 1 { 146 self.target_block_size_ += self.min_block_size_ 147 } 148 } 149 } 150 151 if is_final { 152 *self.histograms_size_ = split.num_types 153 split.num_blocks = self.num_blocks_ 154 } 155 } 156 157 /* Adds the next symbol to the current histogram. When the current histogram 158 reaches the target size, decides on merging the block. */ 159 func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) { 160 histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol) 161 self.block_size_++ 162 if self.block_size_ == self.target_block_size_ { 163 blockSplitterFinishBlockDistance(self, false) /* is_final = */ 164 } 165 }