github.com/andybalholm/brotli@v1.0.6/bit_cost.go (about)

     1  package brotli
     2  
     3  /* Copyright 2013 Google Inc. All Rights Reserved.
     4  
     5     Distributed under MIT license.
     6     See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
     7  */
     8  
     9  /* Functions to estimate the bit cost of Huffman trees. */
    10  func shannonEntropy(population []uint32, size uint, total *uint) float64 {
    11  	var sum uint = 0
    12  	var retval float64 = 0
    13  	var population_end []uint32 = population[size:]
    14  	var p uint
    15  	for -cap(population) < -cap(population_end) {
    16  		p = uint(population[0])
    17  		population = population[1:]
    18  		sum += p
    19  		retval -= float64(p) * fastLog2(p)
    20  	}
    21  
    22  	if sum != 0 {
    23  		retval += float64(sum) * fastLog2(sum)
    24  	}
    25  	*total = sum
    26  	return retval
    27  }
    28  
    29  func bitsEntropy(population []uint32, size uint) float64 {
    30  	var sum uint
    31  	var retval float64 = shannonEntropy(population, size, &sum)
    32  	if retval < float64(sum) {
    33  		/* At least one bit per literal is needed. */
    34  		retval = float64(sum)
    35  	}
    36  
    37  	return retval
    38  }
    39  
    40  const kOneSymbolHistogramCost float64 = 12
    41  const kTwoSymbolHistogramCost float64 = 20
    42  const kThreeSymbolHistogramCost float64 = 28
    43  const kFourSymbolHistogramCost float64 = 37
    44  
    45  func populationCostLiteral(histogram *histogramLiteral) float64 {
    46  	var data_size uint = histogramDataSizeLiteral()
    47  	var count int = 0
    48  	var s [5]uint
    49  	var bits float64 = 0.0
    50  	var i uint
    51  	if histogram.total_count_ == 0 {
    52  		return kOneSymbolHistogramCost
    53  	}
    54  
    55  	for i = 0; i < data_size; i++ {
    56  		if histogram.data_[i] > 0 {
    57  			s[count] = i
    58  			count++
    59  			if count > 4 {
    60  				break
    61  			}
    62  		}
    63  	}
    64  
    65  	if count == 1 {
    66  		return kOneSymbolHistogramCost
    67  	}
    68  
    69  	if count == 2 {
    70  		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
    71  	}
    72  
    73  	if count == 3 {
    74  		var histo0 uint32 = histogram.data_[s[0]]
    75  		var histo1 uint32 = histogram.data_[s[1]]
    76  		var histo2 uint32 = histogram.data_[s[2]]
    77  		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
    78  		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
    79  	}
    80  
    81  	if count == 4 {
    82  		var histo [4]uint32
    83  		var h23 uint32
    84  		var histomax uint32
    85  		for i = 0; i < 4; i++ {
    86  			histo[i] = histogram.data_[s[i]]
    87  		}
    88  
    89  		/* Sort */
    90  		for i = 0; i < 4; i++ {
    91  			var j uint
    92  			for j = i + 1; j < 4; j++ {
    93  				if histo[j] > histo[i] {
    94  					var tmp uint32 = histo[j]
    95  					histo[j] = histo[i]
    96  					histo[i] = tmp
    97  				}
    98  			}
    99  		}
   100  
   101  		h23 = histo[2] + histo[3]
   102  		histomax = brotli_max_uint32_t(h23, histo[0])
   103  		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
   104  	}
   105  	{
   106  		var max_depth uint = 1
   107  		var depth_histo = [codeLengthCodes]uint32{0}
   108  		/* In this loop we compute the entropy of the histogram and simultaneously
   109  		   build a simplified histogram of the code length codes where we use the
   110  		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
   111  
   112  		var log2total float64 = fastLog2(histogram.total_count_)
   113  		for i = 0; i < data_size; {
   114  			if histogram.data_[i] > 0 {
   115  				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
   116  				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
   117  				   = log2(total_count) - log2(count(symbol)) */
   118  
   119  				var depth uint = uint(log2p + 0.5)
   120  				/* Approximate the bit depth by round(-log2(P(symbol))) */
   121  				bits += float64(histogram.data_[i]) * log2p
   122  
   123  				if depth > 15 {
   124  					depth = 15
   125  				}
   126  
   127  				if depth > max_depth {
   128  					max_depth = depth
   129  				}
   130  
   131  				depth_histo[depth]++
   132  				i++
   133  			} else {
   134  				var reps uint32 = 1
   135  				/* Compute the run length of zeros and add the appropriate number of 0
   136  				   and 17 code length codes to the code length code histogram. */
   137  
   138  				var k uint
   139  				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
   140  					reps++
   141  				}
   142  
   143  				i += uint(reps)
   144  				if i == data_size {
   145  					/* Don't add any cost for the last zero run, since these are encoded
   146  					   only implicitly. */
   147  					break
   148  				}
   149  
   150  				if reps < 3 {
   151  					depth_histo[0] += reps
   152  				} else {
   153  					reps -= 2
   154  					for reps > 0 {
   155  						depth_histo[repeatZeroCodeLength]++
   156  
   157  						/* Add the 3 extra bits for the 17 code length code. */
   158  						bits += 3
   159  
   160  						reps >>= 3
   161  					}
   162  				}
   163  			}
   164  		}
   165  
   166  		/* Add the estimated encoding cost of the code length code histogram. */
   167  		bits += float64(18 + 2*max_depth)
   168  
   169  		/* Add the entropy of the code length code histogram. */
   170  		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
   171  	}
   172  
   173  	return bits
   174  }
   175  
   176  func populationCostCommand(histogram *histogramCommand) float64 {
   177  	var data_size uint = histogramDataSizeCommand()
   178  	var count int = 0
   179  	var s [5]uint
   180  	var bits float64 = 0.0
   181  	var i uint
   182  	if histogram.total_count_ == 0 {
   183  		return kOneSymbolHistogramCost
   184  	}
   185  
   186  	for i = 0; i < data_size; i++ {
   187  		if histogram.data_[i] > 0 {
   188  			s[count] = i
   189  			count++
   190  			if count > 4 {
   191  				break
   192  			}
   193  		}
   194  	}
   195  
   196  	if count == 1 {
   197  		return kOneSymbolHistogramCost
   198  	}
   199  
   200  	if count == 2 {
   201  		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
   202  	}
   203  
   204  	if count == 3 {
   205  		var histo0 uint32 = histogram.data_[s[0]]
   206  		var histo1 uint32 = histogram.data_[s[1]]
   207  		var histo2 uint32 = histogram.data_[s[2]]
   208  		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
   209  		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
   210  	}
   211  
   212  	if count == 4 {
   213  		var histo [4]uint32
   214  		var h23 uint32
   215  		var histomax uint32
   216  		for i = 0; i < 4; i++ {
   217  			histo[i] = histogram.data_[s[i]]
   218  		}
   219  
   220  		/* Sort */
   221  		for i = 0; i < 4; i++ {
   222  			var j uint
   223  			for j = i + 1; j < 4; j++ {
   224  				if histo[j] > histo[i] {
   225  					var tmp uint32 = histo[j]
   226  					histo[j] = histo[i]
   227  					histo[i] = tmp
   228  				}
   229  			}
   230  		}
   231  
   232  		h23 = histo[2] + histo[3]
   233  		histomax = brotli_max_uint32_t(h23, histo[0])
   234  		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
   235  	}
   236  	{
   237  		var max_depth uint = 1
   238  		var depth_histo = [codeLengthCodes]uint32{0}
   239  		/* In this loop we compute the entropy of the histogram and simultaneously
   240  		   build a simplified histogram of the code length codes where we use the
   241  		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
   242  
   243  		var log2total float64 = fastLog2(histogram.total_count_)
   244  		for i = 0; i < data_size; {
   245  			if histogram.data_[i] > 0 {
   246  				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
   247  				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
   248  				   = log2(total_count) - log2(count(symbol)) */
   249  
   250  				var depth uint = uint(log2p + 0.5)
   251  				/* Approximate the bit depth by round(-log2(P(symbol))) */
   252  				bits += float64(histogram.data_[i]) * log2p
   253  
   254  				if depth > 15 {
   255  					depth = 15
   256  				}
   257  
   258  				if depth > max_depth {
   259  					max_depth = depth
   260  				}
   261  
   262  				depth_histo[depth]++
   263  				i++
   264  			} else {
   265  				var reps uint32 = 1
   266  				/* Compute the run length of zeros and add the appropriate number of 0
   267  				   and 17 code length codes to the code length code histogram. */
   268  
   269  				var k uint
   270  				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
   271  					reps++
   272  				}
   273  
   274  				i += uint(reps)
   275  				if i == data_size {
   276  					/* Don't add any cost for the last zero run, since these are encoded
   277  					   only implicitly. */
   278  					break
   279  				}
   280  
   281  				if reps < 3 {
   282  					depth_histo[0] += reps
   283  				} else {
   284  					reps -= 2
   285  					for reps > 0 {
   286  						depth_histo[repeatZeroCodeLength]++
   287  
   288  						/* Add the 3 extra bits for the 17 code length code. */
   289  						bits += 3
   290  
   291  						reps >>= 3
   292  					}
   293  				}
   294  			}
   295  		}
   296  
   297  		/* Add the estimated encoding cost of the code length code histogram. */
   298  		bits += float64(18 + 2*max_depth)
   299  
   300  		/* Add the entropy of the code length code histogram. */
   301  		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
   302  	}
   303  
   304  	return bits
   305  }
   306  
   307  func populationCostDistance(histogram *histogramDistance) float64 {
   308  	var data_size uint = histogramDataSizeDistance()
   309  	var count int = 0
   310  	var s [5]uint
   311  	var bits float64 = 0.0
   312  	var i uint
   313  	if histogram.total_count_ == 0 {
   314  		return kOneSymbolHistogramCost
   315  	}
   316  
   317  	for i = 0; i < data_size; i++ {
   318  		if histogram.data_[i] > 0 {
   319  			s[count] = i
   320  			count++
   321  			if count > 4 {
   322  				break
   323  			}
   324  		}
   325  	}
   326  
   327  	if count == 1 {
   328  		return kOneSymbolHistogramCost
   329  	}
   330  
   331  	if count == 2 {
   332  		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
   333  	}
   334  
   335  	if count == 3 {
   336  		var histo0 uint32 = histogram.data_[s[0]]
   337  		var histo1 uint32 = histogram.data_[s[1]]
   338  		var histo2 uint32 = histogram.data_[s[2]]
   339  		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
   340  		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
   341  	}
   342  
   343  	if count == 4 {
   344  		var histo [4]uint32
   345  		var h23 uint32
   346  		var histomax uint32
   347  		for i = 0; i < 4; i++ {
   348  			histo[i] = histogram.data_[s[i]]
   349  		}
   350  
   351  		/* Sort */
   352  		for i = 0; i < 4; i++ {
   353  			var j uint
   354  			for j = i + 1; j < 4; j++ {
   355  				if histo[j] > histo[i] {
   356  					var tmp uint32 = histo[j]
   357  					histo[j] = histo[i]
   358  					histo[i] = tmp
   359  				}
   360  			}
   361  		}
   362  
   363  		h23 = histo[2] + histo[3]
   364  		histomax = brotli_max_uint32_t(h23, histo[0])
   365  		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
   366  	}
   367  	{
   368  		var max_depth uint = 1
   369  		var depth_histo = [codeLengthCodes]uint32{0}
   370  		/* In this loop we compute the entropy of the histogram and simultaneously
   371  		   build a simplified histogram of the code length codes where we use the
   372  		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
   373  
   374  		var log2total float64 = fastLog2(histogram.total_count_)
   375  		for i = 0; i < data_size; {
   376  			if histogram.data_[i] > 0 {
   377  				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
   378  				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
   379  				   = log2(total_count) - log2(count(symbol)) */
   380  
   381  				var depth uint = uint(log2p + 0.5)
   382  				/* Approximate the bit depth by round(-log2(P(symbol))) */
   383  				bits += float64(histogram.data_[i]) * log2p
   384  
   385  				if depth > 15 {
   386  					depth = 15
   387  				}
   388  
   389  				if depth > max_depth {
   390  					max_depth = depth
   391  				}
   392  
   393  				depth_histo[depth]++
   394  				i++
   395  			} else {
   396  				var reps uint32 = 1
   397  				/* Compute the run length of zeros and add the appropriate number of 0
   398  				   and 17 code length codes to the code length code histogram. */
   399  
   400  				var k uint
   401  				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
   402  					reps++
   403  				}
   404  
   405  				i += uint(reps)
   406  				if i == data_size {
   407  					/* Don't add any cost for the last zero run, since these are encoded
   408  					   only implicitly. */
   409  					break
   410  				}
   411  
   412  				if reps < 3 {
   413  					depth_histo[0] += reps
   414  				} else {
   415  					reps -= 2
   416  					for reps > 0 {
   417  						depth_histo[repeatZeroCodeLength]++
   418  
   419  						/* Add the 3 extra bits for the 17 code length code. */
   420  						bits += 3
   421  
   422  						reps >>= 3
   423  					}
   424  				}
   425  			}
   426  		}
   427  
   428  		/* Add the estimated encoding cost of the code length code histogram. */
   429  		bits += float64(18 + 2*max_depth)
   430  
   431  		/* Add the entropy of the code length code histogram. */
   432  		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
   433  	}
   434  
   435  	return bits
   436  }