github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/compress/bzip2/huffman.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bzip2 6 7 import "sort" 8 9 // A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a 10 // symbol. 11 type huffmanTree struct { 12 // nodes contains all the non-leaf nodes in the tree. nodes[0] is the 13 // root of the tree and nextNode contains the index of the next element 14 // of nodes to use when the tree is being constructed. 15 nodes []huffmanNode 16 nextNode int 17 } 18 19 // A huffmanNode is a node in the tree. left and right contain indexes into the 20 // nodes slice of the tree. If left or right is invalidNodeValue then the child 21 // is a left node and its value is in leftValue/rightValue. 22 // 23 // The symbols are uint16s because bzip2 encodes not only MTF indexes in the 24 // tree, but also two magic values for run-length encoding and an EOF symbol. 25 // Thus there are more than 256 possible symbols. 26 type huffmanNode struct { 27 left, right uint16 28 leftValue, rightValue uint16 29 } 30 31 // invalidNodeValue is an invalid index which marks a leaf node in the tree. 32 const invalidNodeValue = 0xffff 33 34 // Decode reads bits from the given bitReader and navigates the tree until a 35 // symbol is found. 36 func (t huffmanTree) Decode(br *bitReader) (v uint16) { 37 nodeIndex := uint16(0) // node 0 is the root of the tree. 38 39 for { 40 node := &t.nodes[nodeIndex] 41 bit := br.ReadBit() 42 // bzip2 encodes left as a true bit. 43 if bit { 44 // left 45 if node.left == invalidNodeValue { 46 return node.leftValue 47 } 48 nodeIndex = node.left 49 } else { 50 // right 51 if node.right == invalidNodeValue { 52 return node.rightValue 53 } 54 nodeIndex = node.right 55 } 56 } 57 } 58 59 // newHuffmanTree builds a Huffman tree from a slice containing the code 60 // lengths of each symbol. The maximum code length is 32 bits. 61 func newHuffmanTree(lengths []uint8) (huffmanTree, error) { 62 // There are many possible trees that assign the same code length to 63 // each symbol (consider reflecting a tree down the middle, for 64 // example). Since the code length assignments determine the 65 // efficiency of the tree, each of these trees is equally good. In 66 // order to minimize the amount of information needed to build a tree 67 // bzip2 uses a canonical tree so that it can be reconstructed given 68 // only the code length assignments. 69 70 if len(lengths) < 2 { 71 panic("newHuffmanTree: too few symbols") 72 } 73 74 var t huffmanTree 75 76 // First we sort the code length assignments by ascending code length, 77 // using the symbol value to break ties. 78 pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths))) 79 for i, length := range lengths { 80 pairs[i].value = uint16(i) 81 pairs[i].length = length 82 } 83 84 sort.Sort(pairs) 85 86 // Now we assign codes to the symbols, starting with the longest code. 87 // We keep the codes packed into a uint32, at the most-significant end. 88 // So branches are taken from the MSB downwards. This makes it easy to 89 // sort them later. 90 code := uint32(0) 91 length := uint8(32) 92 93 codes := huffmanCodes(make([]huffmanCode, len(lengths))) 94 for i := len(pairs) - 1; i >= 0; i-- { 95 if length > pairs[i].length { 96 // If the code length decreases we shift in order to 97 // zero any bits beyond the end of the code. 98 length >>= 32 - pairs[i].length 99 length <<= 32 - pairs[i].length 100 length = pairs[i].length 101 } 102 codes[i].code = code 103 codes[i].codeLen = length 104 codes[i].value = pairs[i].value 105 // We need to 'increment' the code, which means treating |code| 106 // like a |length| bit number. 107 code += 1 << (32 - length) 108 } 109 110 // Now we can sort by the code so that the left half of each branch are 111 // grouped together, recursively. 112 sort.Sort(codes) 113 114 t.nodes = make([]huffmanNode, len(codes)) 115 _, err := buildHuffmanNode(&t, codes, 0) 116 return t, err 117 } 118 119 // huffmanSymbolLengthPair contains a symbol and its code length. 120 type huffmanSymbolLengthPair struct { 121 value uint16 122 length uint8 123 } 124 125 // huffmanSymbolLengthPair is used to provide an interface for sorting. 126 type huffmanSymbolLengthPairs []huffmanSymbolLengthPair 127 128 func (h huffmanSymbolLengthPairs) Len() int { 129 return len(h) 130 } 131 132 func (h huffmanSymbolLengthPairs) Less(i, j int) bool { 133 if h[i].length < h[j].length { 134 return true 135 } 136 if h[i].length > h[j].length { 137 return false 138 } 139 if h[i].value < h[j].value { 140 return true 141 } 142 return false 143 } 144 145 func (h huffmanSymbolLengthPairs) Swap(i, j int) { 146 h[i], h[j] = h[j], h[i] 147 } 148 149 // huffmanCode contains a symbol, its code and code length. 150 type huffmanCode struct { 151 code uint32 152 codeLen uint8 153 value uint16 154 } 155 156 // huffmanCodes is used to provide an interface for sorting. 157 type huffmanCodes []huffmanCode 158 159 func (n huffmanCodes) Len() int { 160 return len(n) 161 } 162 163 func (n huffmanCodes) Less(i, j int) bool { 164 return n[i].code < n[j].code 165 } 166 167 func (n huffmanCodes) Swap(i, j int) { 168 n[i], n[j] = n[j], n[i] 169 } 170 171 // buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in 172 // the Huffman tree at the given level. It returns the index of the newly 173 // constructed node. 174 func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) { 175 test := uint32(1) << (31 - level) 176 177 // We have to search the list of codes to find the divide between the left and right sides. 178 firstRightIndex := len(codes) 179 for i, code := range codes { 180 if code.code&test != 0 { 181 firstRightIndex = i 182 break 183 } 184 } 185 186 left := codes[:firstRightIndex] 187 right := codes[firstRightIndex:] 188 189 if len(left) == 0 || len(right) == 0 { 190 return 0, StructuralError("superfluous level in Huffman tree") 191 } 192 193 nodeIndex = uint16(t.nextNode) 194 node := &t.nodes[t.nextNode] 195 t.nextNode++ 196 197 if len(left) == 1 { 198 // leaf node 199 node.left = invalidNodeValue 200 node.leftValue = left[0].value 201 } else { 202 node.left, err = buildHuffmanNode(t, left, level+1) 203 } 204 205 if err != nil { 206 return 207 } 208 209 if len(right) == 1 { 210 // leaf node 211 node.right = invalidNodeValue 212 node.rightValue = right[0].value 213 } else { 214 node.right, err = buildHuffmanNode(t, right, level+1) 215 } 216 217 return 218 }