github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/compress/flate/huffman_code.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  )
    11  
    12  type huffmanEncoder struct {
    13  	codeBits []uint8
    14  	code     []uint16
    15  }
    16  
    17  type literalNode struct {
    18  	literal uint16
    19  	freq    int32
    20  }
    21  
    22  // A levelInfo describes the state of the constructed tree for a given depth.
    23  type levelInfo struct {
    24  	// Our level.  for better printing
    25  	level int32
    26  
    27  	// The frequency of the last node at this level
    28  	lastFreq int32
    29  
    30  	// The frequency of the next character to add to this level
    31  	nextCharFreq int32
    32  
    33  	// The frequency of the next pair (from level below) to add to this level.
    34  	// Only valid if the "needed" value of the next lower level is 0.
    35  	nextPairFreq int32
    36  
    37  	// The number of chains remaining to generate for this level before moving
    38  	// up to the next level
    39  	needed int32
    40  }
    41  
    42  func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
    43  
    44  func newHuffmanEncoder(size int) *huffmanEncoder {
    45  	return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
    46  }
    47  
    48  // Generates a HuffmanCode corresponding to the fixed literal table
    49  func generateFixedLiteralEncoding() *huffmanEncoder {
    50  	h := newHuffmanEncoder(maxNumLit)
    51  	codeBits := h.codeBits
    52  	code := h.code
    53  	var ch uint16
    54  	for ch = 0; ch < maxNumLit; ch++ {
    55  		var bits uint16
    56  		var size uint8
    57  		switch {
    58  		case ch < 144:
    59  			// size 8, 000110000  .. 10111111
    60  			bits = ch + 48
    61  			size = 8
    62  			break
    63  		case ch < 256:
    64  			// size 9, 110010000 .. 111111111
    65  			bits = ch + 400 - 144
    66  			size = 9
    67  			break
    68  		case ch < 280:
    69  			// size 7, 0000000 .. 0010111
    70  			bits = ch - 256
    71  			size = 7
    72  			break
    73  		default:
    74  			// size 8, 11000000 .. 11000111
    75  			bits = ch + 192 - 280
    76  			size = 8
    77  		}
    78  		codeBits[ch] = size
    79  		code[ch] = reverseBits(bits, size)
    80  	}
    81  	return h
    82  }
    83  
    84  func generateFixedOffsetEncoding() *huffmanEncoder {
    85  	h := newHuffmanEncoder(30)
    86  	codeBits := h.codeBits
    87  	code := h.code
    88  	for ch := uint16(0); ch < 30; ch++ {
    89  		codeBits[ch] = 5
    90  		code[ch] = reverseBits(ch, 5)
    91  	}
    92  	return h
    93  }
    94  
    95  var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
    96  var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
    97  
    98  func (h *huffmanEncoder) bitLength(freq []int32) int64 {
    99  	var total int64
   100  	for i, f := range freq {
   101  		if f != 0 {
   102  			total += int64(f) * int64(h.codeBits[i])
   103  		}
   104  	}
   105  	return total
   106  }
   107  
   108  const maxBitsLimit = 16
   109  
   110  // Return the number of literals assigned to each bit size in the Huffman encoding
   111  //
   112  // This method is only called when list.length >= 3
   113  // The cases of 0, 1, and 2 literals are handled by special case code.
   114  //
   115  // list  An array of the literals with non-zero frequencies
   116  //             and their associated frequencies.  The array is in order of increasing
   117  //             frequency, and has as its last element a special element with frequency
   118  //             MaxInt32
   119  // maxBits     The maximum number of bits that should be used to encode any literal.
   120  //             Must be less than 16.
   121  // return      An integer array in which array[i] indicates the number of literals
   122  //             that should be encoded in i bits.
   123  func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
   124  	if maxBits >= maxBitsLimit {
   125  		panic("flate: maxBits too large")
   126  	}
   127  	n := int32(len(list))
   128  	list = list[0 : n+1]
   129  	list[n] = maxNode()
   130  
   131  	// The tree can't have greater depth than n - 1, no matter what.  This
   132  	// saves a little bit of work in some small cases
   133  	if maxBits > n-1 {
   134  		maxBits = n - 1
   135  	}
   136  
   137  	// Create information about each of the levels.
   138  	// A bogus "Level 0" whose sole purpose is so that
   139  	// level1.prev.needed==0.  This makes level1.nextPairFreq
   140  	// be a legitimate value that never gets chosen.
   141  	var levels [maxBitsLimit]levelInfo
   142  	// leafCounts[i] counts the number of literals at the left
   143  	// of ancestors of the rightmost node at level i.
   144  	// leafCounts[i][j] is the number of literals at the left
   145  	// of the level j ancestor.
   146  	var leafCounts [maxBitsLimit][maxBitsLimit]int32
   147  
   148  	for level := int32(1); level <= maxBits; level++ {
   149  		// For every level, the first two items are the first two characters.
   150  		// We initialize the levels as if we had already figured this out.
   151  		levels[level] = levelInfo{
   152  			level:        level,
   153  			lastFreq:     list[1].freq,
   154  			nextCharFreq: list[2].freq,
   155  			nextPairFreq: list[0].freq + list[1].freq,
   156  		}
   157  		leafCounts[level][level] = 2
   158  		if level == 1 {
   159  			levels[level].nextPairFreq = math.MaxInt32
   160  		}
   161  	}
   162  
   163  	// We need a total of 2*n - 2 items at top level and have already generated 2.
   164  	levels[maxBits].needed = 2*n - 4
   165  
   166  	level := maxBits
   167  	for {
   168  		l := &levels[level]
   169  		if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
   170  			// We've run out of both leafs and pairs.
   171  			// End all calculations for this level.
   172  			// To make sure we never come back to this level or any lower level,
   173  			// set nextPairFreq impossibly large.
   174  			l.needed = 0
   175  			levels[level+1].nextPairFreq = math.MaxInt32
   176  			level++
   177  			continue
   178  		}
   179  
   180  		prevFreq := l.lastFreq
   181  		if l.nextCharFreq < l.nextPairFreq {
   182  			// The next item on this row is a leaf node.
   183  			n := leafCounts[level][level] + 1
   184  			l.lastFreq = l.nextCharFreq
   185  			// Lower leafCounts are the same of the previous node.
   186  			leafCounts[level][level] = n
   187  			l.nextCharFreq = list[n].freq
   188  		} else {
   189  			// The next item on this row is a pair from the previous row.
   190  			// nextPairFreq isn't valid until we generate two
   191  			// more values in the level below
   192  			l.lastFreq = l.nextPairFreq
   193  			// Take leaf counts from the lower level, except counts[level] remains the same.
   194  			copy(leafCounts[level][:level], leafCounts[level-1][:level])
   195  			levels[l.level-1].needed = 2
   196  		}
   197  
   198  		if l.needed--; l.needed == 0 {
   199  			// We've done everything we need to do for this level.
   200  			// Continue calculating one level up.  Fill in nextPairFreq
   201  			// of that level with the sum of the two nodes we've just calculated on
   202  			// this level.
   203  			if l.level == maxBits {
   204  				// All done!
   205  				break
   206  			}
   207  			levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
   208  			level++
   209  		} else {
   210  			// If we stole from below, move down temporarily to replenish it.
   211  			for levels[level-1].needed > 0 {
   212  				level--
   213  			}
   214  		}
   215  	}
   216  
   217  	// Somethings is wrong if at the end, the top level is null or hasn't used
   218  	// all of the leaves.
   219  	if leafCounts[maxBits][maxBits] != n {
   220  		panic("leafCounts[maxBits][maxBits] != n")
   221  	}
   222  
   223  	bitCount := make([]int32, maxBits+1)
   224  	bits := 1
   225  	counts := &leafCounts[maxBits]
   226  	for level := maxBits; level > 0; level-- {
   227  		// chain.leafCount gives the number of literals requiring at least "bits"
   228  		// bits to encode.
   229  		bitCount[bits] = counts[level] - counts[level-1]
   230  		bits++
   231  	}
   232  	return bitCount
   233  }
   234  
   235  // Look at the leaves and assign them a bit count and an encoding as specified
   236  // in RFC 1951 3.2.2
   237  func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
   238  	code := uint16(0)
   239  	for n, bits := range bitCount {
   240  		code <<= 1
   241  		if n == 0 || bits == 0 {
   242  			continue
   243  		}
   244  		// The literals list[len(list)-bits] .. list[len(list)-bits]
   245  		// are encoded using "bits" bits, and get the values
   246  		// code, code + 1, ....  The code values are
   247  		// assigned in literal order (not frequency order).
   248  		chunk := list[len(list)-int(bits):]
   249  		sortByLiteral(chunk)
   250  		for _, node := range chunk {
   251  			h.codeBits[node.literal] = uint8(n)
   252  			h.code[node.literal] = reverseBits(code, uint8(n))
   253  			code++
   254  		}
   255  		list = list[0 : len(list)-int(bits)]
   256  	}
   257  }
   258  
   259  // Update this Huffman Code object to be the minimum code for the specified frequency count.
   260  //
   261  // freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
   262  // maxBits  The maximum number of bits to use for any literal.
   263  func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
   264  	list := make([]literalNode, len(freq)+1)
   265  	// Number of non-zero literals
   266  	count := 0
   267  	// Set list to be the set of all non-zero literals and their frequencies
   268  	for i, f := range freq {
   269  		if f != 0 {
   270  			list[count] = literalNode{uint16(i), f}
   271  			count++
   272  		} else {
   273  			h.codeBits[i] = 0
   274  		}
   275  	}
   276  	// If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
   277  	h.codeBits = h.codeBits[0:len(freq)]
   278  	list = list[0:count]
   279  	if count <= 2 {
   280  		// Handle the small cases here, because they are awkward for the general case code.  With
   281  		// two or fewer literals, everything has bit length 1.
   282  		for i, node := range list {
   283  			// "list" is in order of increasing literal value.
   284  			h.codeBits[node.literal] = 1
   285  			h.code[node.literal] = uint16(i)
   286  		}
   287  		return
   288  	}
   289  	sortByFreq(list)
   290  
   291  	// Get the number of literals for each bit count
   292  	bitCount := h.bitCounts(list, maxBits)
   293  	// And do the assignment
   294  	h.assignEncodingAndSize(bitCount, list)
   295  }
   296  
   297  type literalNodeSorter struct {
   298  	a    []literalNode
   299  	less func(i, j int) bool
   300  }
   301  
   302  func (s literalNodeSorter) Len() int { return len(s.a) }
   303  
   304  func (s literalNodeSorter) Less(i, j int) bool {
   305  	return s.less(i, j)
   306  }
   307  
   308  func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
   309  
   310  func sortByFreq(a []literalNode) {
   311  	s := &literalNodeSorter{a, func(i, j int) bool {
   312  		if a[i].freq == a[j].freq {
   313  			return a[i].literal < a[j].literal
   314  		}
   315  		return a[i].freq < a[j].freq
   316  	}}
   317  	sort.Sort(s)
   318  }
   319  
   320  func sortByLiteral(a []literalNode) {
   321  	s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
   322  	sort.Sort(s)
   323  }