github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/compress/flate/huffman_code.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  )
    11  
    12  type huffmanEncoder struct {
    13  	codeBits []uint8
    14  	code     []uint16
    15  }
    16  
    17  type literalNode struct {
    18  	literal uint16
    19  	freq    int32
    20  }
    21  
    22  type chain struct {
    23  	// The sum of the leaves in this tree
    24  	freq int32
    25  
    26  	// The number of literals to the left of this item at this level
    27  	leafCount int32
    28  
    29  	// The right child of this chain in the previous level.
    30  	up *chain
    31  }
    32  
    33  type levelInfo struct {
    34  	// Our level.  for better printing
    35  	level int32
    36  
    37  	// The most recent chain generated for this level
    38  	lastChain *chain
    39  
    40  	// The frequency of the next character to add to this level
    41  	nextCharFreq int32
    42  
    43  	// The frequency of the next pair (from level below) to add to this level.
    44  	// Only valid if the "needed" value of the next lower level is 0.
    45  	nextPairFreq int32
    46  
    47  	// The number of chains remaining to generate for this level before moving
    48  	// up to the next level
    49  	needed int32
    50  
    51  	// The levelInfo for level+1
    52  	up *levelInfo
    53  
    54  	// The levelInfo for level-1
    55  	down *levelInfo
    56  }
    57  
    58  func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
    59  
    60  func newHuffmanEncoder(size int) *huffmanEncoder {
    61  	return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
    62  }
    63  
    64  // Generates a HuffmanCode corresponding to the fixed literal table
    65  func generateFixedLiteralEncoding() *huffmanEncoder {
    66  	h := newHuffmanEncoder(maxLit)
    67  	codeBits := h.codeBits
    68  	code := h.code
    69  	var ch uint16
    70  	for ch = 0; ch < maxLit; ch++ {
    71  		var bits uint16
    72  		var size uint8
    73  		switch {
    74  		case ch < 144:
    75  			// size 8, 000110000  .. 10111111
    76  			bits = ch + 48
    77  			size = 8
    78  			break
    79  		case ch < 256:
    80  			// size 9, 110010000 .. 111111111
    81  			bits = ch + 400 - 144
    82  			size = 9
    83  			break
    84  		case ch < 280:
    85  			// size 7, 0000000 .. 0010111
    86  			bits = ch - 256
    87  			size = 7
    88  			break
    89  		default:
    90  			// size 8, 11000000 .. 11000111
    91  			bits = ch + 192 - 280
    92  			size = 8
    93  		}
    94  		codeBits[ch] = size
    95  		code[ch] = reverseBits(bits, size)
    96  	}
    97  	return h
    98  }
    99  
   100  func generateFixedOffsetEncoding() *huffmanEncoder {
   101  	h := newHuffmanEncoder(30)
   102  	codeBits := h.codeBits
   103  	code := h.code
   104  	for ch := uint16(0); ch < 30; ch++ {
   105  		codeBits[ch] = 5
   106  		code[ch] = reverseBits(ch, 5)
   107  	}
   108  	return h
   109  }
   110  
   111  var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
   112  var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
   113  
   114  func (h *huffmanEncoder) bitLength(freq []int32) int64 {
   115  	var total int64
   116  	for i, f := range freq {
   117  		if f != 0 {
   118  			total += int64(f) * int64(h.codeBits[i])
   119  		}
   120  	}
   121  	return total
   122  }
   123  
   124  // Return the number of literals assigned to each bit size in the Huffman encoding
   125  //
   126  // This method is only called when list.length >= 3
   127  // The cases of 0, 1, and 2 literals are handled by special case code.
   128  //
   129  // list  An array of the literals with non-zero frequencies
   130  //             and their associated frequencies.  The array is in order of increasing
   131  //             frequency, and has as its last element a special element with frequency
   132  //             MaxInt32
   133  // maxBits     The maximum number of bits that should be used to encode any literal.
   134  // return      An integer array in which array[i] indicates the number of literals
   135  //             that should be encoded in i bits.
   136  func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
   137  	n := int32(len(list))
   138  	list = list[0 : n+1]
   139  	list[n] = maxNode()
   140  
   141  	// The tree can't have greater depth than n - 1, no matter what.  This
   142  	// saves a little bit of work in some small cases
   143  	if maxBits > n-1 {
   144  		maxBits = n - 1
   145  	}
   146  
   147  	// Create information about each of the levels.
   148  	// A bogus "Level 0" whose sole purpose is so that
   149  	// level1.prev.needed==0.  This makes level1.nextPairFreq
   150  	// be a legitimate value that never gets chosen.
   151  	top := &levelInfo{needed: 0}
   152  	chain2 := &chain{list[1].freq, 2, new(chain)}
   153  	for level := int32(1); level <= maxBits; level++ {
   154  		// For every level, the first two items are the first two characters.
   155  		// We initialize the levels as if we had already figured this out.
   156  		top = &levelInfo{
   157  			level:        level,
   158  			lastChain:    chain2,
   159  			nextCharFreq: list[2].freq,
   160  			nextPairFreq: list[0].freq + list[1].freq,
   161  			down:         top,
   162  		}
   163  		top.down.up = top
   164  		if level == 1 {
   165  			top.nextPairFreq = math.MaxInt32
   166  		}
   167  	}
   168  
   169  	// We need a total of 2*n - 2 items at top level and have already generated 2.
   170  	top.needed = 2*n - 4
   171  
   172  	l := top
   173  	for {
   174  		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
   175  			// We've run out of both leafs and pairs.
   176  			// End all calculations for this level.
   177  			// To m sure we never come back to this level or any lower level,
   178  			// set nextPairFreq impossibly large.
   179  			l.lastChain = nil
   180  			l.needed = 0
   181  			l = l.up
   182  			l.nextPairFreq = math.MaxInt32
   183  			continue
   184  		}
   185  
   186  		prevFreq := l.lastChain.freq
   187  		if l.nextCharFreq < l.nextPairFreq {
   188  			// The next item on this row is a leaf node.
   189  			n := l.lastChain.leafCount + 1
   190  			l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
   191  			l.nextCharFreq = list[n].freq
   192  		} else {
   193  			// The next item on this row is a pair from the previous row.
   194  			// nextPairFreq isn't valid until we generate two
   195  			// more values in the level below
   196  			l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
   197  			l.down.needed = 2
   198  		}
   199  
   200  		if l.needed--; l.needed == 0 {
   201  			// We've done everything we need to do for this level.
   202  			// Continue calculating one level up.  Fill in nextPairFreq
   203  			// of that level with the sum of the two nodes we've just calculated on
   204  			// this level.
   205  			up := l.up
   206  			if up == nil {
   207  				// All done!
   208  				break
   209  			}
   210  			up.nextPairFreq = prevFreq + l.lastChain.freq
   211  			l = up
   212  		} else {
   213  			// If we stole from below, move down temporarily to replenish it.
   214  			for l.down.needed > 0 {
   215  				l = l.down
   216  			}
   217  		}
   218  	}
   219  
   220  	// Somethings is wrong if at the end, the top level is null or hasn't used
   221  	// all of the leaves.
   222  	if top.lastChain.leafCount != n {
   223  		panic("top.lastChain.leafCount != n")
   224  	}
   225  
   226  	bitCount := make([]int32, maxBits+1)
   227  	bits := 1
   228  	for chain := top.lastChain; chain.up != nil; chain = chain.up {
   229  		// chain.leafCount gives the number of literals requiring at least "bits"
   230  		// bits to encode.
   231  		bitCount[bits] = chain.leafCount - chain.up.leafCount
   232  		bits++
   233  	}
   234  	return bitCount
   235  }
   236  
   237  // Look at the leaves and assign them a bit count and an encoding as specified
   238  // in RFC 1951 3.2.2
   239  func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
   240  	code := uint16(0)
   241  	for n, bits := range bitCount {
   242  		code <<= 1
   243  		if n == 0 || bits == 0 {
   244  			continue
   245  		}
   246  		// The literals list[len(list)-bits] .. list[len(list)-bits]
   247  		// are encoded using "bits" bits, and get the values
   248  		// code, code + 1, ....  The code values are
   249  		// assigned in literal order (not frequency order).
   250  		chunk := list[len(list)-int(bits):]
   251  		sortByLiteral(chunk)
   252  		for _, node := range chunk {
   253  			h.codeBits[node.literal] = uint8(n)
   254  			h.code[node.literal] = reverseBits(code, uint8(n))
   255  			code++
   256  		}
   257  		list = list[0 : len(list)-int(bits)]
   258  	}
   259  }
   260  
   261  // Update this Huffman Code object to be the minimum code for the specified frequency count.
   262  //
   263  // freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
   264  // maxBits  The maximum number of bits to use for any literal.
   265  func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
   266  	list := make([]literalNode, len(freq)+1)
   267  	// Number of non-zero literals
   268  	count := 0
   269  	// Set list to be the set of all non-zero literals and their frequencies
   270  	for i, f := range freq {
   271  		if f != 0 {
   272  			list[count] = literalNode{uint16(i), f}
   273  			count++
   274  		} else {
   275  			h.codeBits[i] = 0
   276  		}
   277  	}
   278  	// If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
   279  	h.codeBits = h.codeBits[0:len(freq)]
   280  	list = list[0:count]
   281  	if count <= 2 {
   282  		// Handle the small cases here, because they are awkward for the general case code.  With
   283  		// two or fewer literals, everything has bit length 1.
   284  		for i, node := range list {
   285  			// "list" is in order of increasing literal value.
   286  			h.codeBits[node.literal] = 1
   287  			h.code[node.literal] = uint16(i)
   288  		}
   289  		return
   290  	}
   291  	sortByFreq(list)
   292  
   293  	// Get the number of literals for each bit count
   294  	bitCount := h.bitCounts(list, maxBits)
   295  	// And do the assignment
   296  	h.assignEncodingAndSize(bitCount, list)
   297  }
   298  
   299  type literalNodeSorter struct {
   300  	a    []literalNode
   301  	less func(i, j int) bool
   302  }
   303  
   304  func (s literalNodeSorter) Len() int { return len(s.a) }
   305  
   306  func (s literalNodeSorter) Less(i, j int) bool {
   307  	return s.less(i, j)
   308  }
   309  
   310  func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
   311  
   312  func sortByFreq(a []literalNode) {
   313  	s := &literalNodeSorter{a, func(i, j int) bool {
   314  		if a[i].freq == a[j].freq {
   315  			return a[i].literal < a[j].literal
   316  		}
   317  		return a[i].freq < a[j].freq
   318  	}}
   319  	sort.Sort(s)
   320  }
   321  
   322  func sortByLiteral(a []literalNode) {
   323  	s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
   324  	sort.Sort(s)
   325  }