github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/compress/flate/huffman_code.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "math" 9 "sort" 10 ) 11 12 type huffmanEncoder struct { 13 codeBits []uint8 14 code []uint16 15 } 16 17 type literalNode struct { 18 literal uint16 19 freq int32 20 } 21 22 // A levelInfo describes the state of the constructed tree for a given depth. 23 type levelInfo struct { 24 // Our level. for better printing 25 level int32 26 27 // The frequency of the last node at this level 28 lastFreq int32 29 30 // The frequency of the next character to add to this level 31 nextCharFreq int32 32 33 // The frequency of the next pair (from level below) to add to this level. 34 // Only valid if the "needed" value of the next lower level is 0. 35 nextPairFreq int32 36 37 // The number of chains remaining to generate for this level before moving 38 // up to the next level 39 needed int32 40 } 41 42 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } 43 44 func newHuffmanEncoder(size int) *huffmanEncoder { 45 return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} 46 } 47 48 // Generates a HuffmanCode corresponding to the fixed literal table 49 func generateFixedLiteralEncoding() *huffmanEncoder { 50 h := newHuffmanEncoder(maxNumLit) 51 codeBits := h.codeBits 52 code := h.code 53 var ch uint16 54 for ch = 0; ch < maxNumLit; ch++ { 55 var bits uint16 56 var size uint8 57 switch { 58 case ch < 144: 59 // size 8, 000110000 .. 10111111 60 bits = ch + 48 61 size = 8 62 break 63 case ch < 256: 64 // size 9, 110010000 .. 111111111 65 bits = ch + 400 - 144 66 size = 9 67 break 68 case ch < 280: 69 // size 7, 0000000 .. 0010111 70 bits = ch - 256 71 size = 7 72 break 73 default: 74 // size 8, 11000000 .. 11000111 75 bits = ch + 192 - 280 76 size = 8 77 } 78 codeBits[ch] = size 79 code[ch] = reverseBits(bits, size) 80 } 81 return h 82 } 83 84 func generateFixedOffsetEncoding() *huffmanEncoder { 85 h := newHuffmanEncoder(30) 86 codeBits := h.codeBits 87 code := h.code 88 for ch := uint16(0); ch < 30; ch++ { 89 codeBits[ch] = 5 90 code[ch] = reverseBits(ch, 5) 91 } 92 return h 93 } 94 95 var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() 96 var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() 97 98 func (h *huffmanEncoder) bitLength(freq []int32) int64 { 99 var total int64 100 for i, f := range freq { 101 if f != 0 { 102 total += int64(f) * int64(h.codeBits[i]) 103 } 104 } 105 return total 106 } 107 108 const maxBitsLimit = 16 109 110 // Return the number of literals assigned to each bit size in the Huffman encoding 111 // 112 // This method is only called when list.length >= 3 113 // The cases of 0, 1, and 2 literals are handled by special case code. 114 // 115 // list An array of the literals with non-zero frequencies 116 // and their associated frequencies. The array is in order of increasing 117 // frequency, and has as its last element a special element with frequency 118 // MaxInt32 119 // maxBits The maximum number of bits that should be used to encode any literal. 120 // Must be less than 16. 121 // return An integer array in which array[i] indicates the number of literals 122 // that should be encoded in i bits. 123 func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { 124 if maxBits >= maxBitsLimit { 125 panic("flate: maxBits too large") 126 } 127 n := int32(len(list)) 128 list = list[0 : n+1] 129 list[n] = maxNode() 130 131 // The tree can't have greater depth than n - 1, no matter what. This 132 // saves a little bit of work in some small cases 133 if maxBits > n-1 { 134 maxBits = n - 1 135 } 136 137 // Create information about each of the levels. 138 // A bogus "Level 0" whose sole purpose is so that 139 // level1.prev.needed==0. This makes level1.nextPairFreq 140 // be a legitimate value that never gets chosen. 141 var levels [maxBitsLimit]levelInfo 142 // leafCounts[i] counts the number of literals at the left 143 // of ancestors of the rightmost node at level i. 144 // leafCounts[i][j] is the number of literals at the left 145 // of the level j ancestor. 146 var leafCounts [maxBitsLimit][maxBitsLimit]int32 147 148 for level := int32(1); level <= maxBits; level++ { 149 // For every level, the first two items are the first two characters. 150 // We initialize the levels as if we had already figured this out. 151 levels[level] = levelInfo{ 152 level: level, 153 lastFreq: list[1].freq, 154 nextCharFreq: list[2].freq, 155 nextPairFreq: list[0].freq + list[1].freq, 156 } 157 leafCounts[level][level] = 2 158 if level == 1 { 159 levels[level].nextPairFreq = math.MaxInt32 160 } 161 } 162 163 // We need a total of 2*n - 2 items at top level and have already generated 2. 164 levels[maxBits].needed = 2*n - 4 165 166 level := maxBits 167 for { 168 l := &levels[level] 169 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { 170 // We've run out of both leafs and pairs. 171 // End all calculations for this level. 172 // To make sure we never come back to this level or any lower level, 173 // set nextPairFreq impossibly large. 174 l.needed = 0 175 levels[level+1].nextPairFreq = math.MaxInt32 176 level++ 177 continue 178 } 179 180 prevFreq := l.lastFreq 181 if l.nextCharFreq < l.nextPairFreq { 182 // The next item on this row is a leaf node. 183 n := leafCounts[level][level] + 1 184 l.lastFreq = l.nextCharFreq 185 // Lower leafCounts are the same of the previous node. 186 leafCounts[level][level] = n 187 l.nextCharFreq = list[n].freq 188 } else { 189 // The next item on this row is a pair from the previous row. 190 // nextPairFreq isn't valid until we generate two 191 // more values in the level below 192 l.lastFreq = l.nextPairFreq 193 // Take leaf counts from the lower level, except counts[level] remains the same. 194 copy(leafCounts[level][:level], leafCounts[level-1][:level]) 195 levels[l.level-1].needed = 2 196 } 197 198 if l.needed--; l.needed == 0 { 199 // We've done everything we need to do for this level. 200 // Continue calculating one level up. Fill in nextPairFreq 201 // of that level with the sum of the two nodes we've just calculated on 202 // this level. 203 if l.level == maxBits { 204 // All done! 205 break 206 } 207 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq 208 level++ 209 } else { 210 // If we stole from below, move down temporarily to replenish it. 211 for levels[level-1].needed > 0 { 212 level-- 213 } 214 } 215 } 216 217 // Somethings is wrong if at the end, the top level is null or hasn't used 218 // all of the leaves. 219 if leafCounts[maxBits][maxBits] != n { 220 panic("leafCounts[maxBits][maxBits] != n") 221 } 222 223 bitCount := make([]int32, maxBits+1) 224 bits := 1 225 counts := &leafCounts[maxBits] 226 for level := maxBits; level > 0; level-- { 227 // chain.leafCount gives the number of literals requiring at least "bits" 228 // bits to encode. 229 bitCount[bits] = counts[level] - counts[level-1] 230 bits++ 231 } 232 return bitCount 233 } 234 235 // Look at the leaves and assign them a bit count and an encoding as specified 236 // in RFC 1951 3.2.2 237 func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { 238 code := uint16(0) 239 for n, bits := range bitCount { 240 code <<= 1 241 if n == 0 || bits == 0 { 242 continue 243 } 244 // The literals list[len(list)-bits] .. list[len(list)-bits] 245 // are encoded using "bits" bits, and get the values 246 // code, code + 1, .... The code values are 247 // assigned in literal order (not frequency order). 248 chunk := list[len(list)-int(bits):] 249 sortByLiteral(chunk) 250 for _, node := range chunk { 251 h.codeBits[node.literal] = uint8(n) 252 h.code[node.literal] = reverseBits(code, uint8(n)) 253 code++ 254 } 255 list = list[0 : len(list)-int(bits)] 256 } 257 } 258 259 // Update this Huffman Code object to be the minimum code for the specified frequency count. 260 // 261 // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. 262 // maxBits The maximum number of bits to use for any literal. 263 func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { 264 list := make([]literalNode, len(freq)+1) 265 // Number of non-zero literals 266 count := 0 267 // Set list to be the set of all non-zero literals and their frequencies 268 for i, f := range freq { 269 if f != 0 { 270 list[count] = literalNode{uint16(i), f} 271 count++ 272 } else { 273 h.codeBits[i] = 0 274 } 275 } 276 // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros 277 h.codeBits = h.codeBits[0:len(freq)] 278 list = list[0:count] 279 if count <= 2 { 280 // Handle the small cases here, because they are awkward for the general case code. With 281 // two or fewer literals, everything has bit length 1. 282 for i, node := range list { 283 // "list" is in order of increasing literal value. 284 h.codeBits[node.literal] = 1 285 h.code[node.literal] = uint16(i) 286 } 287 return 288 } 289 sortByFreq(list) 290 291 // Get the number of literals for each bit count 292 bitCount := h.bitCounts(list, maxBits) 293 // And do the assignment 294 h.assignEncodingAndSize(bitCount, list) 295 } 296 297 type literalNodeSorter struct { 298 a []literalNode 299 less func(i, j int) bool 300 } 301 302 func (s literalNodeSorter) Len() int { return len(s.a) } 303 304 func (s literalNodeSorter) Less(i, j int) bool { 305 return s.less(i, j) 306 } 307 308 func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } 309 310 func sortByFreq(a []literalNode) { 311 s := &literalNodeSorter{a, func(i, j int) bool { 312 if a[i].freq == a[j].freq { 313 return a[i].literal < a[j].literal 314 } 315 return a[i].freq < a[j].freq 316 }} 317 sort.Sort(s) 318 } 319 320 func sortByLiteral(a []literalNode) { 321 s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }} 322 sort.Sort(s) 323 }