github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/compress/flate/huffman_code.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "math" 9 "sort" 10 ) 11 12 type huffmanEncoder struct { 13 codeBits []uint8 14 code []uint16 15 } 16 17 type literalNode struct { 18 literal uint16 19 freq int32 20 } 21 22 type chain struct { 23 // The sum of the leaves in this tree 24 freq int32 25 26 // The number of literals to the left of this item at this level 27 leafCount int32 28 29 // The right child of this chain in the previous level. 30 up *chain 31 } 32 33 type levelInfo struct { 34 // Our level. for better printing 35 level int32 36 37 // The most recent chain generated for this level 38 lastChain *chain 39 40 // The frequency of the next character to add to this level 41 nextCharFreq int32 42 43 // The frequency of the next pair (from level below) to add to this level. 44 // Only valid if the "needed" value of the next lower level is 0. 45 nextPairFreq int32 46 47 // The number of chains remaining to generate for this level before moving 48 // up to the next level 49 needed int32 50 51 // The levelInfo for level+1 52 up *levelInfo 53 54 // The levelInfo for level-1 55 down *levelInfo 56 } 57 58 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } 59 60 func newHuffmanEncoder(size int) *huffmanEncoder { 61 return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} 62 } 63 64 // Generates a HuffmanCode corresponding to the fixed literal table 65 func generateFixedLiteralEncoding() *huffmanEncoder { 66 h := newHuffmanEncoder(maxLit) 67 codeBits := h.codeBits 68 code := h.code 69 var ch uint16 70 for ch = 0; ch < maxLit; ch++ { 71 var bits uint16 72 var size uint8 73 switch { 74 case ch < 144: 75 // size 8, 000110000 .. 10111111 76 bits = ch + 48 77 size = 8 78 break 79 case ch < 256: 80 // size 9, 110010000 .. 111111111 81 bits = ch + 400 - 144 82 size = 9 83 break 84 case ch < 280: 85 // size 7, 0000000 .. 0010111 86 bits = ch - 256 87 size = 7 88 break 89 default: 90 // size 8, 11000000 .. 11000111 91 bits = ch + 192 - 280 92 size = 8 93 } 94 codeBits[ch] = size 95 code[ch] = reverseBits(bits, size) 96 } 97 return h 98 } 99 100 func generateFixedOffsetEncoding() *huffmanEncoder { 101 h := newHuffmanEncoder(30) 102 codeBits := h.codeBits 103 code := h.code 104 for ch := uint16(0); ch < 30; ch++ { 105 codeBits[ch] = 5 106 code[ch] = reverseBits(ch, 5) 107 } 108 return h 109 } 110 111 var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() 112 var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() 113 114 func (h *huffmanEncoder) bitLength(freq []int32) int64 { 115 var total int64 116 for i, f := range freq { 117 if f != 0 { 118 total += int64(f) * int64(h.codeBits[i]) 119 } 120 } 121 return total 122 } 123 124 // Return the number of literals assigned to each bit size in the Huffman encoding 125 // 126 // This method is only called when list.length >= 3 127 // The cases of 0, 1, and 2 literals are handled by special case code. 128 // 129 // list An array of the literals with non-zero frequencies 130 // and their associated frequencies. The array is in order of increasing 131 // frequency, and has as its last element a special element with frequency 132 // MaxInt32 133 // maxBits The maximum number of bits that should be used to encode any literal. 134 // return An integer array in which array[i] indicates the number of literals 135 // that should be encoded in i bits. 136 func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { 137 n := int32(len(list)) 138 list = list[0 : n+1] 139 list[n] = maxNode() 140 141 // The tree can't have greater depth than n - 1, no matter what. This 142 // saves a little bit of work in some small cases 143 if maxBits > n-1 { 144 maxBits = n - 1 145 } 146 147 // Create information about each of the levels. 148 // A bogus "Level 0" whose sole purpose is so that 149 // level1.prev.needed==0. This makes level1.nextPairFreq 150 // be a legitimate value that never gets chosen. 151 top := &levelInfo{needed: 0} 152 chain2 := &chain{list[1].freq, 2, new(chain)} 153 for level := int32(1); level <= maxBits; level++ { 154 // For every level, the first two items are the first two characters. 155 // We initialize the levels as if we had already figured this out. 156 top = &levelInfo{ 157 level: level, 158 lastChain: chain2, 159 nextCharFreq: list[2].freq, 160 nextPairFreq: list[0].freq + list[1].freq, 161 down: top, 162 } 163 top.down.up = top 164 if level == 1 { 165 top.nextPairFreq = math.MaxInt32 166 } 167 } 168 169 // We need a total of 2*n - 2 items at top level and have already generated 2. 170 top.needed = 2*n - 4 171 172 l := top 173 for { 174 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { 175 // We've run out of both leafs and pairs. 176 // End all calculations for this level. 177 // To m sure we never come back to this level or any lower level, 178 // set nextPairFreq impossibly large. 179 l.lastChain = nil 180 l.needed = 0 181 l = l.up 182 l.nextPairFreq = math.MaxInt32 183 continue 184 } 185 186 prevFreq := l.lastChain.freq 187 if l.nextCharFreq < l.nextPairFreq { 188 // The next item on this row is a leaf node. 189 n := l.lastChain.leafCount + 1 190 l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up} 191 l.nextCharFreq = list[n].freq 192 } else { 193 // The next item on this row is a pair from the previous row. 194 // nextPairFreq isn't valid until we generate two 195 // more values in the level below 196 l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain} 197 l.down.needed = 2 198 } 199 200 if l.needed--; l.needed == 0 { 201 // We've done everything we need to do for this level. 202 // Continue calculating one level up. Fill in nextPairFreq 203 // of that level with the sum of the two nodes we've just calculated on 204 // this level. 205 up := l.up 206 if up == nil { 207 // All done! 208 break 209 } 210 up.nextPairFreq = prevFreq + l.lastChain.freq 211 l = up 212 } else { 213 // If we stole from below, move down temporarily to replenish it. 214 for l.down.needed > 0 { 215 l = l.down 216 } 217 } 218 } 219 220 // Somethings is wrong if at the end, the top level is null or hasn't used 221 // all of the leaves. 222 if top.lastChain.leafCount != n { 223 panic("top.lastChain.leafCount != n") 224 } 225 226 bitCount := make([]int32, maxBits+1) 227 bits := 1 228 for chain := top.lastChain; chain.up != nil; chain = chain.up { 229 // chain.leafCount gives the number of literals requiring at least "bits" 230 // bits to encode. 231 bitCount[bits] = chain.leafCount - chain.up.leafCount 232 bits++ 233 } 234 return bitCount 235 } 236 237 // Look at the leaves and assign them a bit count and an encoding as specified 238 // in RFC 1951 3.2.2 239 func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { 240 code := uint16(0) 241 for n, bits := range bitCount { 242 code <<= 1 243 if n == 0 || bits == 0 { 244 continue 245 } 246 // The literals list[len(list)-bits] .. list[len(list)-bits] 247 // are encoded using "bits" bits, and get the values 248 // code, code + 1, .... The code values are 249 // assigned in literal order (not frequency order). 250 chunk := list[len(list)-int(bits):] 251 sortByLiteral(chunk) 252 for _, node := range chunk { 253 h.codeBits[node.literal] = uint8(n) 254 h.code[node.literal] = reverseBits(code, uint8(n)) 255 code++ 256 } 257 list = list[0 : len(list)-int(bits)] 258 } 259 } 260 261 // Update this Huffman Code object to be the minimum code for the specified frequency count. 262 // 263 // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. 264 // maxBits The maximum number of bits to use for any literal. 265 func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { 266 list := make([]literalNode, len(freq)+1) 267 // Number of non-zero literals 268 count := 0 269 // Set list to be the set of all non-zero literals and their frequencies 270 for i, f := range freq { 271 if f != 0 { 272 list[count] = literalNode{uint16(i), f} 273 count++ 274 } else { 275 h.codeBits[i] = 0 276 } 277 } 278 // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros 279 h.codeBits = h.codeBits[0:len(freq)] 280 list = list[0:count] 281 if count <= 2 { 282 // Handle the small cases here, because they are awkward for the general case code. With 283 // two or fewer literals, everything has bit length 1. 284 for i, node := range list { 285 // "list" is in order of increasing literal value. 286 h.codeBits[node.literal] = 1 287 h.code[node.literal] = uint16(i) 288 } 289 return 290 } 291 sortByFreq(list) 292 293 // Get the number of literals for each bit count 294 bitCount := h.bitCounts(list, maxBits) 295 // And do the assignment 296 h.assignEncodingAndSize(bitCount, list) 297 } 298 299 type literalNodeSorter struct { 300 a []literalNode 301 less func(i, j int) bool 302 } 303 304 func (s literalNodeSorter) Len() int { return len(s.a) } 305 306 func (s literalNodeSorter) Less(i, j int) bool { 307 return s.less(i, j) 308 } 309 310 func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } 311 312 func sortByFreq(a []literalNode) { 313 s := &literalNodeSorter{a, func(i, j int) bool { 314 if a[i].freq == a[j].freq { 315 return a[i].literal < a[j].literal 316 } 317 return a[i].freq < a[j].freq 318 }} 319 sort.Sort(s) 320 } 321 322 func sortByLiteral(a []literalNode) { 323 s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }} 324 sort.Sort(s) 325 }