github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/compress/zstd/enc_best.go (about) 1 // Copyright 2019+ Klaus Post. All rights reserved. 2 // License information can be found in the LICENSE file. 3 // Based on work by Yann Collet, released under BSD License. 4 5 package zstd 6 7 import ( 8 "bytes" 9 "fmt" 10 11 "github.com/bir3/gocompiler/src/cmd/gocmd/compress" 12 ) 13 14 const ( 15 bestLongTableBits = 22 // Bits used in the long match table 16 bestLongTableSize = 1 << bestLongTableBits // Size of the table 17 bestLongLen = 8 // Bytes used for table hash 18 19 // Note: Increasing the short table bits or making the hash shorter 20 // can actually lead to compression degradation since it will 'steal' more from the 21 // long match table and match offsets are quite big. 22 // This greatly depends on the type of input. 23 bestShortTableBits = 18 // Bits used in the short match table 24 bestShortTableSize = 1 << bestShortTableBits // Size of the table 25 bestShortLen = 4 // Bytes used for table hash 26 27 ) 28 29 type match struct { 30 offset int32 31 s int32 32 length int32 33 rep int32 34 est int32 35 } 36 37 const highScore = maxMatchLen * 8 38 39 // estBits will estimate output bits from predefined tables. 40 func (m *match) estBits(bitsPerByte int32) { 41 mlc := mlCode(uint32(m.length - zstdMinMatch)) 42 var ofc uint8 43 if m.rep < 0 { 44 ofc = ofCode(uint32(m.s-m.offset) + 3) 45 } else { 46 ofc = ofCode(uint32(m.rep)) 47 } 48 // Cost, excluding 49 ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] 50 51 // Add cost of match encoding... 52 m.est = int32(ofTT.outBits + mlTT.outBits) 53 m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16) 54 // Subtract savings compared to literal encoding... 55 m.est -= (m.length * bitsPerByte) >> 10 56 if m.est > 0 { 57 // Unlikely gain.. 58 m.length = 0 59 m.est = highScore 60 } 61 } 62 63 // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. 64 // The long match table contains the previous entry with the same hash, 65 // effectively making it a "chain" of length 2. 66 // When we find a long match we choose between the two values and select the longest. 67 // When we find a short match, after checking the long, we check if we can find a long at n+1 68 // and that it is longer (lazy matching). 69 type bestFastEncoder struct { 70 fastBase 71 table [bestShortTableSize]prevEntry 72 longTable [bestLongTableSize]prevEntry 73 dictTable []prevEntry 74 dictLongTable []prevEntry 75 } 76 77 // Encode improves compression... 78 func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { 79 const ( 80 // Input margin is the number of bytes we read (8) 81 // and the maximum we will read ahead (2) 82 inputMargin = 8 + 4 83 minNonLiteralBlockSize = 16 84 ) 85 86 // Protect against e.cur wraparound. 87 for e.cur >= e.bufferReset-int32(len(e.hist)) { 88 if len(e.hist) == 0 { 89 e.table = [bestShortTableSize]prevEntry{} 90 e.longTable = [bestLongTableSize]prevEntry{} 91 e.cur = e.maxMatchOff 92 break 93 } 94 // Shift down everything in the table that isn't already too far away. 95 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 96 for i := range e.table[:] { 97 v := e.table[i].offset 98 v2 := e.table[i].prev 99 if v < minOff { 100 v = 0 101 v2 = 0 102 } else { 103 v = v - e.cur + e.maxMatchOff 104 if v2 < minOff { 105 v2 = 0 106 } else { 107 v2 = v2 - e.cur + e.maxMatchOff 108 } 109 } 110 e.table[i] = prevEntry{ 111 offset: v, 112 prev: v2, 113 } 114 } 115 for i := range e.longTable[:] { 116 v := e.longTable[i].offset 117 v2 := e.longTable[i].prev 118 if v < minOff { 119 v = 0 120 v2 = 0 121 } else { 122 v = v - e.cur + e.maxMatchOff 123 if v2 < minOff { 124 v2 = 0 125 } else { 126 v2 = v2 - e.cur + e.maxMatchOff 127 } 128 } 129 e.longTable[i] = prevEntry{ 130 offset: v, 131 prev: v2, 132 } 133 } 134 e.cur = e.maxMatchOff 135 break 136 } 137 138 s := e.addBlock(src) 139 blk.size = len(src) 140 if len(src) < minNonLiteralBlockSize { 141 blk.extraLits = len(src) 142 blk.literals = blk.literals[:len(src)] 143 copy(blk.literals, src) 144 return 145 } 146 147 // Use this to estimate literal cost. 148 // Scaled by 10 bits. 149 bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src)) 150 // Huffman can never go < 1 bit/byte 151 if bitsPerByte < 1024 { 152 bitsPerByte = 1024 153 } 154 155 // Override src 156 src = e.hist 157 sLimit := int32(len(src)) - inputMargin 158 const kSearchStrength = 10 159 160 // nextEmit is where in src the next emitLiteral should start from. 161 nextEmit := s 162 163 // Relative offsets 164 offset1 := int32(blk.recentOffsets[0]) 165 offset2 := int32(blk.recentOffsets[1]) 166 offset3 := int32(blk.recentOffsets[2]) 167 168 addLiterals := func(s *seq, until int32) { 169 if until == nextEmit { 170 return 171 } 172 blk.literals = append(blk.literals, src[nextEmit:until]...) 173 s.litLen = uint32(until - nextEmit) 174 } 175 176 if debugEncoder { 177 println("recent offsets:", blk.recentOffsets) 178 } 179 180 encodeLoop: 181 for { 182 // We allow the encoder to optionally turn off repeat offsets across blocks 183 canRepeat := len(blk.sequences) > 2 184 185 if debugAsserts && canRepeat && offset1 == 0 { 186 panic("offset0 was 0") 187 } 188 189 const goodEnough = 250 190 191 cv := load6432(src, s) 192 193 nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) 194 nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) 195 candidateL := e.longTable[nextHashL] 196 candidateS := e.table[nextHashS] 197 198 // Set m to a match at offset if it looks like that will improve compression. 199 improve := func(m *match, offset int32, s int32, first uint32, rep int32) { 200 if s-offset >= e.maxMatchOff || load3232(src, offset) != first { 201 return 202 } 203 if debugAsserts { 204 if offset <= 0 { 205 panic(offset) 206 } 207 if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { 208 panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) 209 } 210 } 211 // Try to quick reject if we already have a long match. 212 if m.length > 16 { 213 left := len(src) - int(m.s+m.length) 214 // If we are too close to the end, keep as is. 215 if left <= 0 { 216 return 217 } 218 checkLen := m.length - (s - m.s) - 8 219 if left > 2 && checkLen > 4 { 220 // Check 4 bytes, 4 bytes from the end of the current match. 221 a := load3232(src, offset+checkLen) 222 b := load3232(src, s+checkLen) 223 if a != b { 224 return 225 } 226 } 227 } 228 l := 4 + e.matchlen(s+4, offset+4, src) 229 if rep < 0 { 230 // Extend candidate match backwards as far as possible. 231 tMin := s - e.maxMatchOff 232 if tMin < 0 { 233 tMin = 0 234 } 235 for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength { 236 s-- 237 offset-- 238 l++ 239 } 240 } 241 242 cand := match{offset: offset, s: s, length: l, rep: rep} 243 cand.estBits(bitsPerByte) 244 if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { 245 *m = cand 246 } 247 } 248 249 best := match{s: s, est: highScore} 250 improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) 251 improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) 252 improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) 253 improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) 254 255 if canRepeat && best.length < goodEnough { 256 if s == nextEmit { 257 // Check repeats straight after a match. 258 improve(&best, s-offset2, s, uint32(cv), 1|4) 259 improve(&best, s-offset3, s, uint32(cv), 2|4) 260 if offset1 > 1 { 261 improve(&best, s-(offset1-1), s, uint32(cv), 3|4) 262 } 263 } 264 265 // If either no match or a non-repeat match, check at + 1 266 if best.rep <= 0 { 267 cv32 := uint32(cv >> 8) 268 spp := s + 1 269 improve(&best, spp-offset1, spp, cv32, 1) 270 improve(&best, spp-offset2, spp, cv32, 2) 271 improve(&best, spp-offset3, spp, cv32, 3) 272 if best.rep < 0 { 273 cv32 = uint32(cv >> 24) 274 spp += 2 275 improve(&best, spp-offset1, spp, cv32, 1) 276 improve(&best, spp-offset2, spp, cv32, 2) 277 improve(&best, spp-offset3, spp, cv32, 3) 278 } 279 } 280 } 281 // Load next and check... 282 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} 283 e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} 284 285 // Look far ahead, unless we have a really long match already... 286 if best.length < goodEnough { 287 // No match found, move forward on input, no need to check forward... 288 if best.length < 4 { 289 s += 1 + (s-nextEmit)>>(kSearchStrength-1) 290 if s >= sLimit { 291 break encodeLoop 292 } 293 continue 294 } 295 296 candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] 297 cv = load6432(src, s+1) 298 cv2 := load6432(src, s+2) 299 candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] 300 candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] 301 302 // Short at s+1 303 improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1) 304 // Long at s+1, s+2 305 improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1) 306 improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1) 307 improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1) 308 improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1) 309 if false { 310 // Short at s+3. 311 // Too often worse... 312 improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1) 313 } 314 315 // Start check at a fixed offset to allow for a few mismatches. 316 // For this compression level 2 yields the best results. 317 // We cannot do this if we have already indexed this position. 318 const skipBeginning = 2 319 if best.s > s-skipBeginning { 320 // See if we can find a better match by checking where the current best ends. 321 // Use that offset to see if we can find a better full match. 322 if sAt := best.s + best.length; sAt < sLimit { 323 nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) 324 candidateEnd := e.longTable[nextHashL] 325 326 if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 { 327 improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) 328 if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 { 329 improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) 330 } 331 } 332 } 333 } 334 } 335 336 if debugAsserts { 337 if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { 338 panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) 339 } 340 } 341 342 // We have a match, we can store the forward value 343 if best.rep > 0 { 344 var seq seq 345 seq.matchLen = uint32(best.length - zstdMinMatch) 346 if debugAsserts && s <= nextEmit { 347 panic("s <= nextEmit") 348 } 349 addLiterals(&seq, best.s) 350 351 // Repeat. If bit 4 is set, this is a non-lit repeat. 352 seq.offset = uint32(best.rep & 3) 353 if debugSequences { 354 println("repeat sequence", seq, "next s:", s) 355 } 356 blk.sequences = append(blk.sequences, seq) 357 358 // Index old s + 1 -> s - 1 359 index0 := s + 1 360 s = best.s + best.length 361 362 nextEmit = s 363 if s >= sLimit { 364 if debugEncoder { 365 println("repeat ended", s, best.length) 366 } 367 break encodeLoop 368 } 369 // Index skipped... 370 off := index0 + e.cur 371 for index0 < s { 372 cv0 := load6432(src, index0) 373 h0 := hashLen(cv0, bestLongTableBits, bestLongLen) 374 h1 := hashLen(cv0, bestShortTableBits, bestShortLen) 375 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 376 e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} 377 off++ 378 index0++ 379 } 380 switch best.rep { 381 case 2, 4 | 1: 382 offset1, offset2 = offset2, offset1 383 case 3, 4 | 2: 384 offset1, offset2, offset3 = offset3, offset1, offset2 385 case 4 | 3: 386 offset1, offset2, offset3 = offset1-1, offset1, offset2 387 } 388 continue 389 } 390 391 // A 4-byte match has been found. Update recent offsets. 392 // We'll later see if more than 4 bytes. 393 index0 := s + 1 394 s = best.s 395 t := best.offset 396 offset1, offset2, offset3 = s-t, offset1, offset2 397 398 if debugAsserts && s <= t { 399 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 400 } 401 402 if debugAsserts && int(offset1) > len(src) { 403 panic("invalid offset") 404 } 405 406 // Write our sequence 407 var seq seq 408 l := best.length 409 seq.litLen = uint32(s - nextEmit) 410 seq.matchLen = uint32(l - zstdMinMatch) 411 if seq.litLen > 0 { 412 blk.literals = append(blk.literals, src[nextEmit:s]...) 413 } 414 seq.offset = uint32(s-t) + 3 415 s += l 416 if debugSequences { 417 println("sequence", seq, "next s:", s) 418 } 419 blk.sequences = append(blk.sequences, seq) 420 nextEmit = s 421 if s >= sLimit { 422 break encodeLoop 423 } 424 425 // Index old s + 1 -> s - 1 426 for index0 < s { 427 cv0 := load6432(src, index0) 428 h0 := hashLen(cv0, bestLongTableBits, bestLongLen) 429 h1 := hashLen(cv0, bestShortTableBits, bestShortLen) 430 off := index0 + e.cur 431 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 432 e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} 433 index0++ 434 } 435 } 436 437 if int(nextEmit) < len(src) { 438 blk.literals = append(blk.literals, src[nextEmit:]...) 439 blk.extraLits = len(src) - int(nextEmit) 440 } 441 blk.recentOffsets[0] = uint32(offset1) 442 blk.recentOffsets[1] = uint32(offset2) 443 blk.recentOffsets[2] = uint32(offset3) 444 if debugEncoder { 445 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 446 } 447 } 448 449 // EncodeNoHist will encode a block with no history and no following blocks. 450 // Most notable difference is that src will not be copied for history and 451 // we do not need to check for max match length. 452 func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 453 e.ensureHist(len(src)) 454 e.Encode(blk, src) 455 } 456 457 // Reset will reset and set a dictionary if not nil 458 func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { 459 e.resetBase(d, singleBlock) 460 if d == nil { 461 return 462 } 463 // Init or copy dict table 464 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { 465 if len(e.dictTable) != len(e.table) { 466 e.dictTable = make([]prevEntry, len(e.table)) 467 } 468 end := int32(len(d.content)) - 8 + e.maxMatchOff 469 for i := e.maxMatchOff; i < end; i += 4 { 470 const hashLog = bestShortTableBits 471 472 cv := load6432(d.content, i-e.maxMatchOff) 473 nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 474 nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 475 nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 476 nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 477 e.dictTable[nextHash] = prevEntry{ 478 prev: e.dictTable[nextHash].offset, 479 offset: i, 480 } 481 e.dictTable[nextHash1] = prevEntry{ 482 prev: e.dictTable[nextHash1].offset, 483 offset: i + 1, 484 } 485 e.dictTable[nextHash2] = prevEntry{ 486 prev: e.dictTable[nextHash2].offset, 487 offset: i + 2, 488 } 489 e.dictTable[nextHash3] = prevEntry{ 490 prev: e.dictTable[nextHash3].offset, 491 offset: i + 3, 492 } 493 } 494 e.lastDictID = d.id 495 } 496 497 // Init or copy dict table 498 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { 499 if len(e.dictLongTable) != len(e.longTable) { 500 e.dictLongTable = make([]prevEntry, len(e.longTable)) 501 } 502 if len(d.content) >= 8 { 503 cv := load6432(d.content, 0) 504 h := hashLen(cv, bestLongTableBits, bestLongLen) 505 e.dictLongTable[h] = prevEntry{ 506 offset: e.maxMatchOff, 507 prev: e.dictLongTable[h].offset, 508 } 509 510 end := int32(len(d.content)) - 8 + e.maxMatchOff 511 off := 8 // First to read 512 for i := e.maxMatchOff + 1; i < end; i++ { 513 cv = cv>>8 | (uint64(d.content[off]) << 56) 514 h := hashLen(cv, bestLongTableBits, bestLongLen) 515 e.dictLongTable[h] = prevEntry{ 516 offset: i, 517 prev: e.dictLongTable[h].offset, 518 } 519 off++ 520 } 521 } 522 e.lastDictID = d.id 523 } 524 // Reset table to initial state 525 copy(e.longTable[:], e.dictLongTable) 526 527 e.cur = e.maxMatchOff 528 // Reset table to initial state 529 copy(e.table[:], e.dictTable) 530 }