github.com/andybalholm/brotli@v1.0.6/huffman.go (about)

     1  package brotli
     2  
     3  /* Copyright 2013 Google Inc. All Rights Reserved.
     4  
     5     Distributed under MIT license.
     6     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     7  */
     8  
     9  /* Utilities for building Huffman decoding tables. */
    10  
    11  const huffmanMaxCodeLength = 15
    12  
    13  /* Maximum possible Huffman table size for an alphabet size of (index * 32),
    14     max code length 15 and root table bits 8. */
    15  var kMaxHuffmanTableSize = []uint16{
    16  	256,
    17  	402,
    18  	436,
    19  	468,
    20  	500,
    21  	534,
    22  	566,
    23  	598,
    24  	630,
    25  	662,
    26  	694,
    27  	726,
    28  	758,
    29  	790,
    30  	822,
    31  	854,
    32  	886,
    33  	920,
    34  	952,
    35  	984,
    36  	1016,
    37  	1048,
    38  	1080,
    39  	1112,
    40  	1144,
    41  	1176,
    42  	1208,
    43  	1240,
    44  	1272,
    45  	1304,
    46  	1336,
    47  	1368,
    48  	1400,
    49  	1432,
    50  	1464,
    51  	1496,
    52  	1528,
    53  }
    54  
    55  /* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
    56  const huffmanMaxSize26 = 396
    57  
    58  /* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
    59  const huffmanMaxSize258 = 632
    60  
    61  /* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
    62  const huffmanMaxSize272 = 646
    63  
    64  const huffmanMaxCodeLengthCodeLength = 5
    65  
    66  /* Do not create this struct directly - use the ConstructHuffmanCode
    67   * constructor below! */
    68  type huffmanCode struct {
    69  	bits  byte
    70  	value uint16
    71  }
    72  
    73  func constructHuffmanCode(bits byte, value uint16) huffmanCode {
    74  	var h huffmanCode
    75  	h.bits = bits
    76  	h.value = value
    77  	return h
    78  }
    79  
    80  /* Builds Huffman lookup table assuming code lengths are in symbol order. */
    81  
    82  /* Builds Huffman lookup table assuming code lengths are in symbol order.
    83     Returns size of resulting table. */
    84  
    85  /* Builds a simple Huffman table. The |num_symbols| parameter is to be
    86     interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
    87     2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
    88     4 means 4 symbols with lengths [1, 2, 3, 3]. */
    89  
    90  /* Contains a collection of Huffman trees with the same alphabet size. */
    91  /* max_symbol is needed due to simple codes since log2(alphabet_size) could be
    92     greater than log2(max_symbol). */
    93  type huffmanTreeGroup struct {
    94  	htrees        [][]huffmanCode
    95  	codes         []huffmanCode
    96  	alphabet_size uint16
    97  	max_symbol    uint16
    98  	num_htrees    uint16
    99  }
   100  
   101  const reverseBitsMax = 8
   102  
   103  const reverseBitsBase = 0
   104  
   105  var kReverseBits = [1 << reverseBitsMax]byte{
   106  	0x00,
   107  	0x80,
   108  	0x40,
   109  	0xC0,
   110  	0x20,
   111  	0xA0,
   112  	0x60,
   113  	0xE0,
   114  	0x10,
   115  	0x90,
   116  	0x50,
   117  	0xD0,
   118  	0x30,
   119  	0xB0,
   120  	0x70,
   121  	0xF0,
   122  	0x08,
   123  	0x88,
   124  	0x48,
   125  	0xC8,
   126  	0x28,
   127  	0xA8,
   128  	0x68,
   129  	0xE8,
   130  	0x18,
   131  	0x98,
   132  	0x58,
   133  	0xD8,
   134  	0x38,
   135  	0xB8,
   136  	0x78,
   137  	0xF8,
   138  	0x04,
   139  	0x84,
   140  	0x44,
   141  	0xC4,
   142  	0x24,
   143  	0xA4,
   144  	0x64,
   145  	0xE4,
   146  	0x14,
   147  	0x94,
   148  	0x54,
   149  	0xD4,
   150  	0x34,
   151  	0xB4,
   152  	0x74,
   153  	0xF4,
   154  	0x0C,
   155  	0x8C,
   156  	0x4C,
   157  	0xCC,
   158  	0x2C,
   159  	0xAC,
   160  	0x6C,
   161  	0xEC,
   162  	0x1C,
   163  	0x9C,
   164  	0x5C,
   165  	0xDC,
   166  	0x3C,
   167  	0xBC,
   168  	0x7C,
   169  	0xFC,
   170  	0x02,
   171  	0x82,
   172  	0x42,
   173  	0xC2,
   174  	0x22,
   175  	0xA2,
   176  	0x62,
   177  	0xE2,
   178  	0x12,
   179  	0x92,
   180  	0x52,
   181  	0xD2,
   182  	0x32,
   183  	0xB2,
   184  	0x72,
   185  	0xF2,
   186  	0x0A,
   187  	0x8A,
   188  	0x4A,
   189  	0xCA,
   190  	0x2A,
   191  	0xAA,
   192  	0x6A,
   193  	0xEA,
   194  	0x1A,
   195  	0x9A,
   196  	0x5A,
   197  	0xDA,
   198  	0x3A,
   199  	0xBA,
   200  	0x7A,
   201  	0xFA,
   202  	0x06,
   203  	0x86,
   204  	0x46,
   205  	0xC6,
   206  	0x26,
   207  	0xA6,
   208  	0x66,
   209  	0xE6,
   210  	0x16,
   211  	0x96,
   212  	0x56,
   213  	0xD6,
   214  	0x36,
   215  	0xB6,
   216  	0x76,
   217  	0xF6,
   218  	0x0E,
   219  	0x8E,
   220  	0x4E,
   221  	0xCE,
   222  	0x2E,
   223  	0xAE,
   224  	0x6E,
   225  	0xEE,
   226  	0x1E,
   227  	0x9E,
   228  	0x5E,
   229  	0xDE,
   230  	0x3E,
   231  	0xBE,
   232  	0x7E,
   233  	0xFE,
   234  	0x01,
   235  	0x81,
   236  	0x41,
   237  	0xC1,
   238  	0x21,
   239  	0xA1,
   240  	0x61,
   241  	0xE1,
   242  	0x11,
   243  	0x91,
   244  	0x51,
   245  	0xD1,
   246  	0x31,
   247  	0xB1,
   248  	0x71,
   249  	0xF1,
   250  	0x09,
   251  	0x89,
   252  	0x49,
   253  	0xC9,
   254  	0x29,
   255  	0xA9,
   256  	0x69,
   257  	0xE9,
   258  	0x19,
   259  	0x99,
   260  	0x59,
   261  	0xD9,
   262  	0x39,
   263  	0xB9,
   264  	0x79,
   265  	0xF9,
   266  	0x05,
   267  	0x85,
   268  	0x45,
   269  	0xC5,
   270  	0x25,
   271  	0xA5,
   272  	0x65,
   273  	0xE5,
   274  	0x15,
   275  	0x95,
   276  	0x55,
   277  	0xD5,
   278  	0x35,
   279  	0xB5,
   280  	0x75,
   281  	0xF5,
   282  	0x0D,
   283  	0x8D,
   284  	0x4D,
   285  	0xCD,
   286  	0x2D,
   287  	0xAD,
   288  	0x6D,
   289  	0xED,
   290  	0x1D,
   291  	0x9D,
   292  	0x5D,
   293  	0xDD,
   294  	0x3D,
   295  	0xBD,
   296  	0x7D,
   297  	0xFD,
   298  	0x03,
   299  	0x83,
   300  	0x43,
   301  	0xC3,
   302  	0x23,
   303  	0xA3,
   304  	0x63,
   305  	0xE3,
   306  	0x13,
   307  	0x93,
   308  	0x53,
   309  	0xD3,
   310  	0x33,
   311  	0xB3,
   312  	0x73,
   313  	0xF3,
   314  	0x0B,
   315  	0x8B,
   316  	0x4B,
   317  	0xCB,
   318  	0x2B,
   319  	0xAB,
   320  	0x6B,
   321  	0xEB,
   322  	0x1B,
   323  	0x9B,
   324  	0x5B,
   325  	0xDB,
   326  	0x3B,
   327  	0xBB,
   328  	0x7B,
   329  	0xFB,
   330  	0x07,
   331  	0x87,
   332  	0x47,
   333  	0xC7,
   334  	0x27,
   335  	0xA7,
   336  	0x67,
   337  	0xE7,
   338  	0x17,
   339  	0x97,
   340  	0x57,
   341  	0xD7,
   342  	0x37,
   343  	0xB7,
   344  	0x77,
   345  	0xF7,
   346  	0x0F,
   347  	0x8F,
   348  	0x4F,
   349  	0xCF,
   350  	0x2F,
   351  	0xAF,
   352  	0x6F,
   353  	0xEF,
   354  	0x1F,
   355  	0x9F,
   356  	0x5F,
   357  	0xDF,
   358  	0x3F,
   359  	0xBF,
   360  	0x7F,
   361  	0xFF,
   362  }
   363  
   364  const reverseBitsLowest = (uint64(1) << (reverseBitsMax - 1 + reverseBitsBase))
   365  
   366  /* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
   367     where reverse(value, len) is the bit-wise reversal of the len least
   368     significant bits of value. */
   369  func reverseBits8(num uint64) uint64 {
   370  	return uint64(kReverseBits[num])
   371  }
   372  
   373  /* Stores code in table[0], table[step], table[2*step], ..., table[end] */
   374  /* Assumes that end is an integer multiple of step */
   375  func replicateValue(table []huffmanCode, step int, end int, code huffmanCode) {
   376  	for {
   377  		end -= step
   378  		table[end] = code
   379  		if end <= 0 {
   380  			break
   381  		}
   382  	}
   383  }
   384  
   385  /* Returns the table width of the next 2nd level table. |count| is the histogram
   386     of bit lengths for the remaining symbols, |len| is the code length of the
   387     next processed symbol. */
   388  func nextTableBitSize(count []uint16, len int, root_bits int) int {
   389  	var left int = 1 << uint(len-root_bits)
   390  	for len < huffmanMaxCodeLength {
   391  		left -= int(count[len])
   392  		if left <= 0 {
   393  			break
   394  		}
   395  		len++
   396  		left <<= 1
   397  	}
   398  
   399  	return len - root_bits
   400  }
   401  
   402  func buildCodeLengthsHuffmanTable(table []huffmanCode, code_lengths []byte, count []uint16) {
   403  	var code huffmanCode /* current table entry */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* step size to replicate values in current table */ /* size of current table */ /* symbols sorted by code length */
   404  	var symbol int
   405  	var key uint64
   406  	var key_step uint64
   407  	var step int
   408  	var table_size int
   409  	var sorted [codeLengthCodes]int
   410  	var offset [huffmanMaxCodeLengthCodeLength + 1]int
   411  	var bits int
   412  	var bits_count int
   413  	/* offsets in sorted table for each length */
   414  	assert(huffmanMaxCodeLengthCodeLength <= reverseBitsMax)
   415  
   416  	/* Generate offsets into sorted symbol table by code length. */
   417  	symbol = -1
   418  
   419  	bits = 1
   420  	var i int
   421  	for i = 0; i < huffmanMaxCodeLengthCodeLength; i++ {
   422  		symbol += int(count[bits])
   423  		offset[bits] = symbol
   424  		bits++
   425  	}
   426  
   427  	/* Symbols with code length 0 are placed after all other symbols. */
   428  	offset[0] = codeLengthCodes - 1
   429  
   430  	/* Sort symbols by length, by symbol order within each length. */
   431  	symbol = codeLengthCodes
   432  
   433  	for {
   434  		var i int
   435  		for i = 0; i < 6; i++ {
   436  			symbol--
   437  			sorted[offset[code_lengths[symbol]]] = symbol
   438  			offset[code_lengths[symbol]]--
   439  		}
   440  		if symbol == 0 {
   441  			break
   442  		}
   443  	}
   444  
   445  	table_size = 1 << huffmanMaxCodeLengthCodeLength
   446  
   447  	/* Special case: all symbols but one have 0 code length. */
   448  	if offset[0] == 0 {
   449  		code = constructHuffmanCode(0, uint16(sorted[0]))
   450  		for key = 0; key < uint64(table_size); key++ {
   451  			table[key] = code
   452  		}
   453  
   454  		return
   455  	}
   456  
   457  	/* Fill in table. */
   458  	key = 0
   459  
   460  	key_step = reverseBitsLowest
   461  	symbol = 0
   462  	bits = 1
   463  	step = 2
   464  	for {
   465  		for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
   466  			code = constructHuffmanCode(byte(bits), uint16(sorted[symbol]))
   467  			symbol++
   468  			replicateValue(table[reverseBits8(key):], step, table_size, code)
   469  			key += key_step
   470  		}
   471  
   472  		step <<= 1
   473  		key_step >>= 1
   474  		bits++
   475  		if bits > huffmanMaxCodeLengthCodeLength {
   476  			break
   477  		}
   478  	}
   479  }
   480  
   481  func buildHuffmanTable(root_table []huffmanCode, root_bits int, symbol_lists symbolList, count []uint16) uint32 {
   482  	var code huffmanCode /* current table entry */ /* next available space in table */ /* current code length */ /* symbol index in original or sorted table */ /* prefix code */ /* prefix code addend */ /* 2nd level table prefix code */ /* 2nd level table prefix code addend */ /* step size to replicate values in current table */ /* key length of current table */ /* size of current table */ /* sum of root table size and 2nd level table sizes */
   483  	var table []huffmanCode
   484  	var len int
   485  	var symbol int
   486  	var key uint64
   487  	var key_step uint64
   488  	var sub_key uint64
   489  	var sub_key_step uint64
   490  	var step int
   491  	var table_bits int
   492  	var table_size int
   493  	var total_size int
   494  	var max_length int = -1
   495  	var bits int
   496  	var bits_count int
   497  
   498  	assert(root_bits <= reverseBitsMax)
   499  	assert(huffmanMaxCodeLength-root_bits <= reverseBitsMax)
   500  
   501  	for symbolListGet(symbol_lists, max_length) == 0xFFFF {
   502  		max_length--
   503  	}
   504  	max_length += huffmanMaxCodeLength + 1
   505  
   506  	table = root_table
   507  	table_bits = root_bits
   508  	table_size = 1 << uint(table_bits)
   509  	total_size = table_size
   510  
   511  	/* Fill in the root table. Reduce the table size to if possible,
   512  	   and create the repetitions by memcpy. */
   513  	if table_bits > max_length {
   514  		table_bits = max_length
   515  		table_size = 1 << uint(table_bits)
   516  	}
   517  
   518  	key = 0
   519  	key_step = reverseBitsLowest
   520  	bits = 1
   521  	step = 2
   522  	for {
   523  		symbol = bits - (huffmanMaxCodeLength + 1)
   524  		for bits_count = int(count[bits]); bits_count != 0; bits_count-- {
   525  			symbol = int(symbolListGet(symbol_lists, symbol))
   526  			code = constructHuffmanCode(byte(bits), uint16(symbol))
   527  			replicateValue(table[reverseBits8(key):], step, table_size, code)
   528  			key += key_step
   529  		}
   530  
   531  		step <<= 1
   532  		key_step >>= 1
   533  		bits++
   534  		if bits > table_bits {
   535  			break
   536  		}
   537  	}
   538  
   539  	/* If root_bits != table_bits then replicate to fill the remaining slots. */
   540  	for total_size != table_size {
   541  		copy(table[table_size:], table[:uint(table_size)])
   542  		table_size <<= 1
   543  	}
   544  
   545  	/* Fill in 2nd level tables and add pointers to root table. */
   546  	key_step = reverseBitsLowest >> uint(root_bits-1)
   547  
   548  	sub_key = reverseBitsLowest << 1
   549  	sub_key_step = reverseBitsLowest
   550  	len = root_bits + 1
   551  	step = 2
   552  	for ; len <= max_length; len++ {
   553  		symbol = len - (huffmanMaxCodeLength + 1)
   554  		for ; count[len] != 0; count[len]-- {
   555  			if sub_key == reverseBitsLowest<<1 {
   556  				table = table[table_size:]
   557  				table_bits = nextTableBitSize(count, int(len), root_bits)
   558  				table_size = 1 << uint(table_bits)
   559  				total_size += table_size
   560  				sub_key = reverseBits8(key)
   561  				key += key_step
   562  				root_table[sub_key] = constructHuffmanCode(byte(table_bits+root_bits), uint16(uint64(uint(-cap(table)+cap(root_table)))-sub_key))
   563  				sub_key = 0
   564  			}
   565  
   566  			symbol = int(symbolListGet(symbol_lists, symbol))
   567  			code = constructHuffmanCode(byte(len-root_bits), uint16(symbol))
   568  			replicateValue(table[reverseBits8(sub_key):], step, table_size, code)
   569  			sub_key += sub_key_step
   570  		}
   571  
   572  		step <<= 1
   573  		sub_key_step >>= 1
   574  	}
   575  
   576  	return uint32(total_size)
   577  }
   578  
   579  func buildSimpleHuffmanTable(table []huffmanCode, root_bits int, val []uint16, num_symbols uint32) uint32 {
   580  	var table_size uint32 = 1
   581  	var goal_size uint32 = 1 << uint(root_bits)
   582  	switch num_symbols {
   583  	case 0:
   584  		table[0] = constructHuffmanCode(0, val[0])
   585  
   586  	case 1:
   587  		if val[1] > val[0] {
   588  			table[0] = constructHuffmanCode(1, val[0])
   589  			table[1] = constructHuffmanCode(1, val[1])
   590  		} else {
   591  			table[0] = constructHuffmanCode(1, val[1])
   592  			table[1] = constructHuffmanCode(1, val[0])
   593  		}
   594  
   595  		table_size = 2
   596  
   597  	case 2:
   598  		table[0] = constructHuffmanCode(1, val[0])
   599  		table[2] = constructHuffmanCode(1, val[0])
   600  		if val[2] > val[1] {
   601  			table[1] = constructHuffmanCode(2, val[1])
   602  			table[3] = constructHuffmanCode(2, val[2])
   603  		} else {
   604  			table[1] = constructHuffmanCode(2, val[2])
   605  			table[3] = constructHuffmanCode(2, val[1])
   606  		}
   607  
   608  		table_size = 4
   609  
   610  	case 3:
   611  		var i int
   612  		var k int
   613  		for i = 0; i < 3; i++ {
   614  			for k = i + 1; k < 4; k++ {
   615  				if val[k] < val[i] {
   616  					var t uint16 = val[k]
   617  					val[k] = val[i]
   618  					val[i] = t
   619  				}
   620  			}
   621  		}
   622  
   623  		table[0] = constructHuffmanCode(2, val[0])
   624  		table[2] = constructHuffmanCode(2, val[1])
   625  		table[1] = constructHuffmanCode(2, val[2])
   626  		table[3] = constructHuffmanCode(2, val[3])
   627  		table_size = 4
   628  
   629  	case 4:
   630  		if val[3] < val[2] {
   631  			var t uint16 = val[3]
   632  			val[3] = val[2]
   633  			val[2] = t
   634  		}
   635  
   636  		table[0] = constructHuffmanCode(1, val[0])
   637  		table[1] = constructHuffmanCode(2, val[1])
   638  		table[2] = constructHuffmanCode(1, val[0])
   639  		table[3] = constructHuffmanCode(3, val[2])
   640  		table[4] = constructHuffmanCode(1, val[0])
   641  		table[5] = constructHuffmanCode(2, val[1])
   642  		table[6] = constructHuffmanCode(1, val[0])
   643  		table[7] = constructHuffmanCode(3, val[3])
   644  		table_size = 8
   645  	}
   646  
   647  	for table_size != goal_size {
   648  		copy(table[table_size:], table[:uint(table_size)])
   649  		table_size <<= 1
   650  	}
   651  
   652  	return goal_size
   653  }