github.com/andybalholm/brotli@v1.0.6/bit_cost.go (about) 1 package brotli 2 3 /* Copyright 2013 Google Inc. All Rights Reserved. 4 5 Distributed under MIT license. 6 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 7 */ 8 9 /* Functions to estimate the bit cost of Huffman trees. */ 10 func shannonEntropy(population []uint32, size uint, total *uint) float64 { 11 var sum uint = 0 12 var retval float64 = 0 13 var population_end []uint32 = population[size:] 14 var p uint 15 for -cap(population) < -cap(population_end) { 16 p = uint(population[0]) 17 population = population[1:] 18 sum += p 19 retval -= float64(p) * fastLog2(p) 20 } 21 22 if sum != 0 { 23 retval += float64(sum) * fastLog2(sum) 24 } 25 *total = sum 26 return retval 27 } 28 29 func bitsEntropy(population []uint32, size uint) float64 { 30 var sum uint 31 var retval float64 = shannonEntropy(population, size, &sum) 32 if retval < float64(sum) { 33 /* At least one bit per literal is needed. */ 34 retval = float64(sum) 35 } 36 37 return retval 38 } 39 40 const kOneSymbolHistogramCost float64 = 12 41 const kTwoSymbolHistogramCost float64 = 20 42 const kThreeSymbolHistogramCost float64 = 28 43 const kFourSymbolHistogramCost float64 = 37 44 45 func populationCostLiteral(histogram *histogramLiteral) float64 { 46 var data_size uint = histogramDataSizeLiteral() 47 var count int = 0 48 var s [5]uint 49 var bits float64 = 0.0 50 var i uint 51 if histogram.total_count_ == 0 { 52 return kOneSymbolHistogramCost 53 } 54 55 for i = 0; i < data_size; i++ { 56 if histogram.data_[i] > 0 { 57 s[count] = i 58 count++ 59 if count > 4 { 60 break 61 } 62 } 63 } 64 65 if count == 1 { 66 return kOneSymbolHistogramCost 67 } 68 69 if count == 2 { 70 return kTwoSymbolHistogramCost + float64(histogram.total_count_) 71 } 72 73 if count == 3 { 74 var histo0 uint32 = histogram.data_[s[0]] 75 var histo1 uint32 = histogram.data_[s[1]] 76 var histo2 uint32 = histogram.data_[s[2]] 77 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) 78 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) 79 } 80 81 if count == 4 { 82 var histo [4]uint32 83 var h23 uint32 84 var histomax uint32 85 for i = 0; i < 4; i++ { 86 histo[i] = histogram.data_[s[i]] 87 } 88 89 /* Sort */ 90 for i = 0; i < 4; i++ { 91 var j uint 92 for j = i + 1; j < 4; j++ { 93 if histo[j] > histo[i] { 94 var tmp uint32 = histo[j] 95 histo[j] = histo[i] 96 histo[i] = tmp 97 } 98 } 99 } 100 101 h23 = histo[2] + histo[3] 102 histomax = brotli_max_uint32_t(h23, histo[0]) 103 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) 104 } 105 { 106 var max_depth uint = 1 107 var depth_histo = [codeLengthCodes]uint32{0} 108 /* In this loop we compute the entropy of the histogram and simultaneously 109 build a simplified histogram of the code length codes where we use the 110 zero repeat code 17, but we don't use the non-zero repeat code 16. */ 111 112 var log2total float64 = fastLog2(histogram.total_count_) 113 for i = 0; i < data_size; { 114 if histogram.data_[i] > 0 { 115 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) 116 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = 117 = log2(total_count) - log2(count(symbol)) */ 118 119 var depth uint = uint(log2p + 0.5) 120 /* Approximate the bit depth by round(-log2(P(symbol))) */ 121 bits += float64(histogram.data_[i]) * log2p 122 123 if depth > 15 { 124 depth = 15 125 } 126 127 if depth > max_depth { 128 max_depth = depth 129 } 130 131 depth_histo[depth]++ 132 i++ 133 } else { 134 var reps uint32 = 1 135 /* Compute the run length of zeros and add the appropriate number of 0 136 and 17 code length codes to the code length code histogram. */ 137 138 var k uint 139 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { 140 reps++ 141 } 142 143 i += uint(reps) 144 if i == data_size { 145 /* Don't add any cost for the last zero run, since these are encoded 146 only implicitly. */ 147 break 148 } 149 150 if reps < 3 { 151 depth_histo[0] += reps 152 } else { 153 reps -= 2 154 for reps > 0 { 155 depth_histo[repeatZeroCodeLength]++ 156 157 /* Add the 3 extra bits for the 17 code length code. */ 158 bits += 3 159 160 reps >>= 3 161 } 162 } 163 } 164 } 165 166 /* Add the estimated encoding cost of the code length code histogram. */ 167 bits += float64(18 + 2*max_depth) 168 169 /* Add the entropy of the code length code histogram. */ 170 bits += bitsEntropy(depth_histo[:], codeLengthCodes) 171 } 172 173 return bits 174 } 175 176 func populationCostCommand(histogram *histogramCommand) float64 { 177 var data_size uint = histogramDataSizeCommand() 178 var count int = 0 179 var s [5]uint 180 var bits float64 = 0.0 181 var i uint 182 if histogram.total_count_ == 0 { 183 return kOneSymbolHistogramCost 184 } 185 186 for i = 0; i < data_size; i++ { 187 if histogram.data_[i] > 0 { 188 s[count] = i 189 count++ 190 if count > 4 { 191 break 192 } 193 } 194 } 195 196 if count == 1 { 197 return kOneSymbolHistogramCost 198 } 199 200 if count == 2 { 201 return kTwoSymbolHistogramCost + float64(histogram.total_count_) 202 } 203 204 if count == 3 { 205 var histo0 uint32 = histogram.data_[s[0]] 206 var histo1 uint32 = histogram.data_[s[1]] 207 var histo2 uint32 = histogram.data_[s[2]] 208 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) 209 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) 210 } 211 212 if count == 4 { 213 var histo [4]uint32 214 var h23 uint32 215 var histomax uint32 216 for i = 0; i < 4; i++ { 217 histo[i] = histogram.data_[s[i]] 218 } 219 220 /* Sort */ 221 for i = 0; i < 4; i++ { 222 var j uint 223 for j = i + 1; j < 4; j++ { 224 if histo[j] > histo[i] { 225 var tmp uint32 = histo[j] 226 histo[j] = histo[i] 227 histo[i] = tmp 228 } 229 } 230 } 231 232 h23 = histo[2] + histo[3] 233 histomax = brotli_max_uint32_t(h23, histo[0]) 234 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) 235 } 236 { 237 var max_depth uint = 1 238 var depth_histo = [codeLengthCodes]uint32{0} 239 /* In this loop we compute the entropy of the histogram and simultaneously 240 build a simplified histogram of the code length codes where we use the 241 zero repeat code 17, but we don't use the non-zero repeat code 16. */ 242 243 var log2total float64 = fastLog2(histogram.total_count_) 244 for i = 0; i < data_size; { 245 if histogram.data_[i] > 0 { 246 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) 247 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = 248 = log2(total_count) - log2(count(symbol)) */ 249 250 var depth uint = uint(log2p + 0.5) 251 /* Approximate the bit depth by round(-log2(P(symbol))) */ 252 bits += float64(histogram.data_[i]) * log2p 253 254 if depth > 15 { 255 depth = 15 256 } 257 258 if depth > max_depth { 259 max_depth = depth 260 } 261 262 depth_histo[depth]++ 263 i++ 264 } else { 265 var reps uint32 = 1 266 /* Compute the run length of zeros and add the appropriate number of 0 267 and 17 code length codes to the code length code histogram. */ 268 269 var k uint 270 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { 271 reps++ 272 } 273 274 i += uint(reps) 275 if i == data_size { 276 /* Don't add any cost for the last zero run, since these are encoded 277 only implicitly. */ 278 break 279 } 280 281 if reps < 3 { 282 depth_histo[0] += reps 283 } else { 284 reps -= 2 285 for reps > 0 { 286 depth_histo[repeatZeroCodeLength]++ 287 288 /* Add the 3 extra bits for the 17 code length code. */ 289 bits += 3 290 291 reps >>= 3 292 } 293 } 294 } 295 } 296 297 /* Add the estimated encoding cost of the code length code histogram. */ 298 bits += float64(18 + 2*max_depth) 299 300 /* Add the entropy of the code length code histogram. */ 301 bits += bitsEntropy(depth_histo[:], codeLengthCodes) 302 } 303 304 return bits 305 } 306 307 func populationCostDistance(histogram *histogramDistance) float64 { 308 var data_size uint = histogramDataSizeDistance() 309 var count int = 0 310 var s [5]uint 311 var bits float64 = 0.0 312 var i uint 313 if histogram.total_count_ == 0 { 314 return kOneSymbolHistogramCost 315 } 316 317 for i = 0; i < data_size; i++ { 318 if histogram.data_[i] > 0 { 319 s[count] = i 320 count++ 321 if count > 4 { 322 break 323 } 324 } 325 } 326 327 if count == 1 { 328 return kOneSymbolHistogramCost 329 } 330 331 if count == 2 { 332 return kTwoSymbolHistogramCost + float64(histogram.total_count_) 333 } 334 335 if count == 3 { 336 var histo0 uint32 = histogram.data_[s[0]] 337 var histo1 uint32 = histogram.data_[s[1]] 338 var histo2 uint32 = histogram.data_[s[2]] 339 var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) 340 return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) 341 } 342 343 if count == 4 { 344 var histo [4]uint32 345 var h23 uint32 346 var histomax uint32 347 for i = 0; i < 4; i++ { 348 histo[i] = histogram.data_[s[i]] 349 } 350 351 /* Sort */ 352 for i = 0; i < 4; i++ { 353 var j uint 354 for j = i + 1; j < 4; j++ { 355 if histo[j] > histo[i] { 356 var tmp uint32 = histo[j] 357 histo[j] = histo[i] 358 histo[i] = tmp 359 } 360 } 361 } 362 363 h23 = histo[2] + histo[3] 364 histomax = brotli_max_uint32_t(h23, histo[0]) 365 return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) 366 } 367 { 368 var max_depth uint = 1 369 var depth_histo = [codeLengthCodes]uint32{0} 370 /* In this loop we compute the entropy of the histogram and simultaneously 371 build a simplified histogram of the code length codes where we use the 372 zero repeat code 17, but we don't use the non-zero repeat code 16. */ 373 374 var log2total float64 = fastLog2(histogram.total_count_) 375 for i = 0; i < data_size; { 376 if histogram.data_[i] > 0 { 377 var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) 378 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = 379 = log2(total_count) - log2(count(symbol)) */ 380 381 var depth uint = uint(log2p + 0.5) 382 /* Approximate the bit depth by round(-log2(P(symbol))) */ 383 bits += float64(histogram.data_[i]) * log2p 384 385 if depth > 15 { 386 depth = 15 387 } 388 389 if depth > max_depth { 390 max_depth = depth 391 } 392 393 depth_histo[depth]++ 394 i++ 395 } else { 396 var reps uint32 = 1 397 /* Compute the run length of zeros and add the appropriate number of 0 398 and 17 code length codes to the code length code histogram. */ 399 400 var k uint 401 for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { 402 reps++ 403 } 404 405 i += uint(reps) 406 if i == data_size { 407 /* Don't add any cost for the last zero run, since these are encoded 408 only implicitly. */ 409 break 410 } 411 412 if reps < 3 { 413 depth_histo[0] += reps 414 } else { 415 reps -= 2 416 for reps > 0 { 417 depth_histo[repeatZeroCodeLength]++ 418 419 /* Add the 3 extra bits for the 17 code length code. */ 420 bits += 3 421 422 reps >>= 3 423 } 424 } 425 } 426 } 427 428 /* Add the estimated encoding cost of the code length code histogram. */ 429 bits += float64(18 + 2*max_depth) 430 431 /* Add the entropy of the code length code histogram. */ 432 bits += bitsEntropy(depth_histo[:], codeLengthCodes) 433 } 434 435 return bits 436 }