github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_better.go (about) 1 // Copyright 2019+ Klaus Post. All rights reserved. 2 // License information can be found in the LICENSE file. 3 // Based on work by Yann Collet, released under BSD License. 4 5 package zstd 6 7 import "fmt" 8 9 const ( 10 betterLongTableBits = 19 // Bits used in the long match table 11 betterLongTableSize = 1 << betterLongTableBits // Size of the table 12 betterLongLen = 8 // Bytes used for table hash 13 14 // Note: Increasing the short table bits or making the hash shorter 15 // can actually lead to compression degradation since it will 'steal' more from the 16 // long match table and match offsets are quite big. 17 // This greatly depends on the type of input. 18 betterShortTableBits = 13 // Bits used in the short match table 19 betterShortTableSize = 1 << betterShortTableBits // Size of the table 20 betterShortLen = 5 // Bytes used for table hash 21 22 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table 23 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard 24 25 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table 26 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard 27 ) 28 29 type prevEntry struct { 30 offset int32 31 prev int32 32 } 33 34 // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. 35 // The long match table contains the previous entry with the same hash, 36 // effectively making it a "chain" of length 2. 37 // When we find a long match we choose between the two values and select the longest. 38 // When we find a short match, after checking the long, we check if we can find a long at n+1 39 // and that it is longer (lazy matching). 40 type betterFastEncoder struct { 41 fastBase 42 table [betterShortTableSize]tableEntry 43 longTable [betterLongTableSize]prevEntry 44 } 45 46 type betterFastEncoderDict struct { 47 betterFastEncoder 48 dictTable []tableEntry 49 dictLongTable []prevEntry 50 shortTableShardDirty [betterShortTableShardCnt]bool 51 longTableShardDirty [betterLongTableShardCnt]bool 52 allDirty bool 53 } 54 55 // Encode improves compression... 56 func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { 57 const ( 58 // Input margin is the number of bytes we read (8) 59 // and the maximum we will read ahead (2) 60 inputMargin = 8 + 2 61 minNonLiteralBlockSize = 16 62 ) 63 64 // Protect against e.cur wraparound. 65 for e.cur >= e.bufferReset-int32(len(e.hist)) { 66 if len(e.hist) == 0 { 67 e.table = [betterShortTableSize]tableEntry{} 68 e.longTable = [betterLongTableSize]prevEntry{} 69 e.cur = e.maxMatchOff 70 break 71 } 72 // Shift down everything in the table that isn't already too far away. 73 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 74 for i := range e.table[:] { 75 v := e.table[i].offset 76 if v < minOff { 77 v = 0 78 } else { 79 v = v - e.cur + e.maxMatchOff 80 } 81 e.table[i].offset = v 82 } 83 for i := range e.longTable[:] { 84 v := e.longTable[i].offset 85 v2 := e.longTable[i].prev 86 if v < minOff { 87 v = 0 88 v2 = 0 89 } else { 90 v = v - e.cur + e.maxMatchOff 91 if v2 < minOff { 92 v2 = 0 93 } else { 94 v2 = v2 - e.cur + e.maxMatchOff 95 } 96 } 97 e.longTable[i] = prevEntry{ 98 offset: v, 99 prev: v2, 100 } 101 } 102 e.cur = e.maxMatchOff 103 break 104 } 105 106 s := e.addBlock(src) 107 blk.size = len(src) 108 if len(src) < minNonLiteralBlockSize { 109 blk.extraLits = len(src) 110 blk.literals = blk.literals[:len(src)] 111 copy(blk.literals, src) 112 return 113 } 114 115 // Override src 116 src = e.hist 117 sLimit := int32(len(src)) - inputMargin 118 // stepSize is the number of bytes to skip on every main loop iteration. 119 // It should be >= 1. 120 const stepSize = 1 121 122 const kSearchStrength = 9 123 124 // nextEmit is where in src the next emitLiteral should start from. 125 nextEmit := s 126 cv := load6432(src, s) 127 128 // Relative offsets 129 offset1 := int32(blk.recentOffsets[0]) 130 offset2 := int32(blk.recentOffsets[1]) 131 132 addLiterals := func(s *seq, until int32) { 133 if until == nextEmit { 134 return 135 } 136 blk.literals = append(blk.literals, src[nextEmit:until]...) 137 s.litLen = uint32(until - nextEmit) 138 } 139 if debugEncoder { 140 println("recent offsets:", blk.recentOffsets) 141 } 142 143 encodeLoop: 144 for { 145 var t int32 146 // We allow the encoder to optionally turn off repeat offsets across blocks 147 canRepeat := len(blk.sequences) > 2 148 var matched int32 149 150 for { 151 if debugAsserts && canRepeat && offset1 == 0 { 152 panic("offset0 was 0") 153 } 154 155 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 156 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 157 candidateL := e.longTable[nextHashL] 158 candidateS := e.table[nextHashS] 159 160 const repOff = 1 161 repIndex := s - offset1 + repOff 162 off := s + e.cur 163 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 164 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 165 166 if canRepeat { 167 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 168 // Consider history as well. 169 var seq seq 170 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 171 172 seq.matchLen = uint32(lenght - zstdMinMatch) 173 174 // We might be able to match backwards. 175 // Extend as long as we can. 176 start := s + repOff 177 // We end the search early, so we don't risk 0 literals 178 // and have to do special offset treatment. 179 startLimit := nextEmit + 1 180 181 tMin := s - e.maxMatchOff 182 if tMin < 0 { 183 tMin = 0 184 } 185 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 186 repIndex-- 187 start-- 188 seq.matchLen++ 189 } 190 addLiterals(&seq, start) 191 192 // rep 0 193 seq.offset = 1 194 if debugSequences { 195 println("repeat sequence", seq, "next s:", s) 196 } 197 blk.sequences = append(blk.sequences, seq) 198 199 // Index match start+1 (long) -> s - 1 200 index0 := s + repOff 201 s += lenght + repOff 202 203 nextEmit = s 204 if s >= sLimit { 205 if debugEncoder { 206 println("repeat ended", s, lenght) 207 208 } 209 break encodeLoop 210 } 211 // Index skipped... 212 for index0 < s-1 { 213 cv0 := load6432(src, index0) 214 cv1 := cv0 >> 8 215 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 216 off := index0 + e.cur 217 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 218 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 219 index0 += 2 220 } 221 cv = load6432(src, s) 222 continue 223 } 224 const repOff2 = 1 225 226 // We deviate from the reference encoder and also check offset 2. 227 // Still slower and not much better, so disabled. 228 // repIndex = s - offset2 + repOff2 229 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 230 // Consider history as well. 231 var seq seq 232 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 233 234 seq.matchLen = uint32(lenght - zstdMinMatch) 235 236 // We might be able to match backwards. 237 // Extend as long as we can. 238 start := s + repOff2 239 // We end the search early, so we don't risk 0 literals 240 // and have to do special offset treatment. 241 startLimit := nextEmit + 1 242 243 tMin := s - e.maxMatchOff 244 if tMin < 0 { 245 tMin = 0 246 } 247 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 248 repIndex-- 249 start-- 250 seq.matchLen++ 251 } 252 addLiterals(&seq, start) 253 254 // rep 2 255 seq.offset = 2 256 if debugSequences { 257 println("repeat sequence 2", seq, "next s:", s) 258 } 259 blk.sequences = append(blk.sequences, seq) 260 261 index0 := s + repOff2 262 s += lenght + repOff2 263 nextEmit = s 264 if s >= sLimit { 265 if debugEncoder { 266 println("repeat ended", s, lenght) 267 268 } 269 break encodeLoop 270 } 271 272 // Index skipped... 273 for index0 < s-1 { 274 cv0 := load6432(src, index0) 275 cv1 := cv0 >> 8 276 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 277 off := index0 + e.cur 278 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 279 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 280 index0 += 2 281 } 282 cv = load6432(src, s) 283 // Swap offsets 284 offset1, offset2 = offset2, offset1 285 continue 286 } 287 } 288 // Find the offsets of our two matches. 289 coffsetL := candidateL.offset - e.cur 290 coffsetLP := candidateL.prev - e.cur 291 292 // Check if we have a long match. 293 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 294 // Found a long match, at least 8 bytes. 295 matched = e.matchlen(s+8, coffsetL+8, src) + 8 296 t = coffsetL 297 if debugAsserts && s <= t { 298 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 299 } 300 if debugAsserts && s-t > e.maxMatchOff { 301 panic("s - t >e.maxMatchOff") 302 } 303 if debugMatches { 304 println("long match") 305 } 306 307 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 308 // Found a long match, at least 8 bytes. 309 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 310 if prevMatch > matched { 311 matched = prevMatch 312 t = coffsetLP 313 } 314 if debugAsserts && s <= t { 315 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 316 } 317 if debugAsserts && s-t > e.maxMatchOff { 318 panic("s - t >e.maxMatchOff") 319 } 320 if debugMatches { 321 println("long match") 322 } 323 } 324 break 325 } 326 327 // Check if we have a long match on prev. 328 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 329 // Found a long match, at least 8 bytes. 330 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 331 t = coffsetLP 332 if debugAsserts && s <= t { 333 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 334 } 335 if debugAsserts && s-t > e.maxMatchOff { 336 panic("s - t >e.maxMatchOff") 337 } 338 if debugMatches { 339 println("long match") 340 } 341 break 342 } 343 344 coffsetS := candidateS.offset - e.cur 345 346 // Check if we have a short match. 347 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 348 // found a regular match 349 matched = e.matchlen(s+4, coffsetS+4, src) + 4 350 351 // See if we can find a long match at s+1 352 const checkAt = 1 353 cv := load6432(src, s+checkAt) 354 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) 355 candidateL = e.longTable[nextHashL] 356 coffsetL = candidateL.offset - e.cur 357 358 // We can store it, since we have at least a 4 byte match. 359 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 360 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 361 // Found a long match, at least 8 bytes. 362 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 363 if matchedNext > matched { 364 t = coffsetL 365 s += checkAt 366 matched = matchedNext 367 if debugMatches { 368 println("long match (after short)") 369 } 370 break 371 } 372 } 373 374 // Check prev long... 375 coffsetL = candidateL.prev - e.cur 376 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 377 // Found a long match, at least 8 bytes. 378 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 379 if matchedNext > matched { 380 t = coffsetL 381 s += checkAt 382 matched = matchedNext 383 if debugMatches { 384 println("prev long match (after short)") 385 } 386 break 387 } 388 } 389 t = coffsetS 390 if debugAsserts && s <= t { 391 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 392 } 393 if debugAsserts && s-t > e.maxMatchOff { 394 panic("s - t >e.maxMatchOff") 395 } 396 if debugAsserts && t < 0 { 397 panic("t<0") 398 } 399 if debugMatches { 400 println("short match") 401 } 402 break 403 } 404 405 // No match found, move forward in input. 406 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 407 if s >= sLimit { 408 break encodeLoop 409 } 410 cv = load6432(src, s) 411 } 412 413 // Try to find a better match by searching for a long match at the end of the current best match 414 if s+matched < sLimit { 415 // Allow some bytes at the beginning to mismatch. 416 // Sweet spot is around 3 bytes, but depends on input. 417 // The skipped bytes are tested in Extend backwards, 418 // and still picked up as part of the match if they do. 419 const skipBeginning = 3 420 421 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) 422 s2 := s + skipBeginning 423 cv := load3232(src, s2) 424 candidateL := e.longTable[nextHashL] 425 coffsetL := candidateL.offset - e.cur - matched + skipBeginning 426 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 427 // Found a long match, at least 4 bytes. 428 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 429 if matchedNext > matched { 430 t = coffsetL 431 s = s2 432 matched = matchedNext 433 if debugMatches { 434 println("long match at end-of-match") 435 } 436 } 437 } 438 439 // Check prev long... 440 if true { 441 coffsetL = candidateL.prev - e.cur - matched + skipBeginning 442 if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 443 // Found a long match, at least 4 bytes. 444 matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 445 if matchedNext > matched { 446 t = coffsetL 447 s = s2 448 matched = matchedNext 449 if debugMatches { 450 println("prev long match at end-of-match") 451 } 452 } 453 } 454 } 455 } 456 // A match has been found. Update recent offsets. 457 offset2 = offset1 458 offset1 = s - t 459 460 if debugAsserts && s <= t { 461 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 462 } 463 464 if debugAsserts && canRepeat && int(offset1) > len(src) { 465 panic("invalid offset") 466 } 467 468 // Extend the n-byte match as long as possible. 469 l := matched 470 471 // Extend backwards 472 tMin := s - e.maxMatchOff 473 if tMin < 0 { 474 tMin = 0 475 } 476 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 477 s-- 478 t-- 479 l++ 480 } 481 482 // Write our sequence 483 var seq seq 484 seq.litLen = uint32(s - nextEmit) 485 seq.matchLen = uint32(l - zstdMinMatch) 486 if seq.litLen > 0 { 487 blk.literals = append(blk.literals, src[nextEmit:s]...) 488 } 489 seq.offset = uint32(s-t) + 3 490 s += l 491 if debugSequences { 492 println("sequence", seq, "next s:", s) 493 } 494 blk.sequences = append(blk.sequences, seq) 495 nextEmit = s 496 if s >= sLimit { 497 break encodeLoop 498 } 499 500 // Index match start+1 (long) -> s - 1 501 index0 := s - l + 1 502 for index0 < s-1 { 503 cv0 := load6432(src, index0) 504 cv1 := cv0 >> 8 505 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 506 off := index0 + e.cur 507 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 508 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} 509 index0 += 2 510 } 511 512 cv = load6432(src, s) 513 if !canRepeat { 514 continue 515 } 516 517 // Check offset 2 518 for { 519 o2 := s - offset2 520 if load3232(src, o2) != uint32(cv) { 521 // Do regular search 522 break 523 } 524 525 // Store this, since we have it. 526 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 527 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 528 529 // We have at least 4 byte match. 530 // No need to check backwards. We come straight from a match 531 l := 4 + e.matchlen(s+4, o2+4, src) 532 533 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 534 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 535 seq.matchLen = uint32(l) - zstdMinMatch 536 seq.litLen = 0 537 538 // Since litlen is always 0, this is offset 1. 539 seq.offset = 1 540 s += l 541 nextEmit = s 542 if debugSequences { 543 println("sequence", seq, "next s:", s) 544 } 545 blk.sequences = append(blk.sequences, seq) 546 547 // Swap offset 1 and 2. 548 offset1, offset2 = offset2, offset1 549 if s >= sLimit { 550 // Finished 551 break encodeLoop 552 } 553 cv = load6432(src, s) 554 } 555 } 556 557 if int(nextEmit) < len(src) { 558 blk.literals = append(blk.literals, src[nextEmit:]...) 559 blk.extraLits = len(src) - int(nextEmit) 560 } 561 blk.recentOffsets[0] = uint32(offset1) 562 blk.recentOffsets[1] = uint32(offset2) 563 if debugEncoder { 564 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 565 } 566 } 567 568 // EncodeNoHist will encode a block with no history and no following blocks. 569 // Most notable difference is that src will not be copied for history and 570 // we do not need to check for max match length. 571 func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 572 e.ensureHist(len(src)) 573 e.Encode(blk, src) 574 } 575 576 // Encode improves compression... 577 func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { 578 const ( 579 // Input margin is the number of bytes we read (8) 580 // and the maximum we will read ahead (2) 581 inputMargin = 8 + 2 582 minNonLiteralBlockSize = 16 583 ) 584 585 // Protect against e.cur wraparound. 586 for e.cur >= e.bufferReset-int32(len(e.hist)) { 587 if len(e.hist) == 0 { 588 for i := range e.table[:] { 589 e.table[i] = tableEntry{} 590 } 591 for i := range e.longTable[:] { 592 e.longTable[i] = prevEntry{} 593 } 594 e.cur = e.maxMatchOff 595 e.allDirty = true 596 break 597 } 598 // Shift down everything in the table that isn't already too far away. 599 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 600 for i := range e.table[:] { 601 v := e.table[i].offset 602 if v < minOff { 603 v = 0 604 } else { 605 v = v - e.cur + e.maxMatchOff 606 } 607 e.table[i].offset = v 608 } 609 for i := range e.longTable[:] { 610 v := e.longTable[i].offset 611 v2 := e.longTable[i].prev 612 if v < minOff { 613 v = 0 614 v2 = 0 615 } else { 616 v = v - e.cur + e.maxMatchOff 617 if v2 < minOff { 618 v2 = 0 619 } else { 620 v2 = v2 - e.cur + e.maxMatchOff 621 } 622 } 623 e.longTable[i] = prevEntry{ 624 offset: v, 625 prev: v2, 626 } 627 } 628 e.allDirty = true 629 e.cur = e.maxMatchOff 630 break 631 } 632 633 s := e.addBlock(src) 634 blk.size = len(src) 635 if len(src) < minNonLiteralBlockSize { 636 blk.extraLits = len(src) 637 blk.literals = blk.literals[:len(src)] 638 copy(blk.literals, src) 639 return 640 } 641 642 // Override src 643 src = e.hist 644 sLimit := int32(len(src)) - inputMargin 645 // stepSize is the number of bytes to skip on every main loop iteration. 646 // It should be >= 1. 647 const stepSize = 1 648 649 const kSearchStrength = 9 650 651 // nextEmit is where in src the next emitLiteral should start from. 652 nextEmit := s 653 cv := load6432(src, s) 654 655 // Relative offsets 656 offset1 := int32(blk.recentOffsets[0]) 657 offset2 := int32(blk.recentOffsets[1]) 658 659 addLiterals := func(s *seq, until int32) { 660 if until == nextEmit { 661 return 662 } 663 blk.literals = append(blk.literals, src[nextEmit:until]...) 664 s.litLen = uint32(until - nextEmit) 665 } 666 if debugEncoder { 667 println("recent offsets:", blk.recentOffsets) 668 } 669 670 encodeLoop: 671 for { 672 var t int32 673 // We allow the encoder to optionally turn off repeat offsets across blocks 674 canRepeat := len(blk.sequences) > 2 675 var matched int32 676 677 for { 678 if debugAsserts && canRepeat && offset1 == 0 { 679 panic("offset0 was 0") 680 } 681 682 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 683 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 684 candidateL := e.longTable[nextHashL] 685 candidateS := e.table[nextHashS] 686 687 const repOff = 1 688 repIndex := s - offset1 + repOff 689 off := s + e.cur 690 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} 691 e.markLongShardDirty(nextHashL) 692 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} 693 e.markShortShardDirty(nextHashS) 694 695 if canRepeat { 696 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 697 // Consider history as well. 698 var seq seq 699 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 700 701 seq.matchLen = uint32(lenght - zstdMinMatch) 702 703 // We might be able to match backwards. 704 // Extend as long as we can. 705 start := s + repOff 706 // We end the search early, so we don't risk 0 literals 707 // and have to do special offset treatment. 708 startLimit := nextEmit + 1 709 710 tMin := s - e.maxMatchOff 711 if tMin < 0 { 712 tMin = 0 713 } 714 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 715 repIndex-- 716 start-- 717 seq.matchLen++ 718 } 719 addLiterals(&seq, start) 720 721 // rep 0 722 seq.offset = 1 723 if debugSequences { 724 println("repeat sequence", seq, "next s:", s) 725 } 726 blk.sequences = append(blk.sequences, seq) 727 728 // Index match start+1 (long) -> s - 1 729 index0 := s + repOff 730 s += lenght + repOff 731 732 nextEmit = s 733 if s >= sLimit { 734 if debugEncoder { 735 println("repeat ended", s, lenght) 736 737 } 738 break encodeLoop 739 } 740 // Index skipped... 741 for index0 < s-1 { 742 cv0 := load6432(src, index0) 743 cv1 := cv0 >> 8 744 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 745 off := index0 + e.cur 746 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 747 e.markLongShardDirty(h0) 748 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 749 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 750 e.markShortShardDirty(h1) 751 index0 += 2 752 } 753 cv = load6432(src, s) 754 continue 755 } 756 const repOff2 = 1 757 758 // We deviate from the reference encoder and also check offset 2. 759 // Still slower and not much better, so disabled. 760 // repIndex = s - offset2 + repOff2 761 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { 762 // Consider history as well. 763 var seq seq 764 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) 765 766 seq.matchLen = uint32(lenght - zstdMinMatch) 767 768 // We might be able to match backwards. 769 // Extend as long as we can. 770 start := s + repOff2 771 // We end the search early, so we don't risk 0 literals 772 // and have to do special offset treatment. 773 startLimit := nextEmit + 1 774 775 tMin := s - e.maxMatchOff 776 if tMin < 0 { 777 tMin = 0 778 } 779 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 780 repIndex-- 781 start-- 782 seq.matchLen++ 783 } 784 addLiterals(&seq, start) 785 786 // rep 2 787 seq.offset = 2 788 if debugSequences { 789 println("repeat sequence 2", seq, "next s:", s) 790 } 791 blk.sequences = append(blk.sequences, seq) 792 793 index0 := s + repOff2 794 s += lenght + repOff2 795 nextEmit = s 796 if s >= sLimit { 797 if debugEncoder { 798 println("repeat ended", s, lenght) 799 800 } 801 break encodeLoop 802 } 803 804 // Index skipped... 805 for index0 < s-1 { 806 cv0 := load6432(src, index0) 807 cv1 := cv0 >> 8 808 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 809 off := index0 + e.cur 810 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 811 e.markLongShardDirty(h0) 812 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 813 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 814 e.markShortShardDirty(h1) 815 index0 += 2 816 } 817 cv = load6432(src, s) 818 // Swap offsets 819 offset1, offset2 = offset2, offset1 820 continue 821 } 822 } 823 // Find the offsets of our two matches. 824 coffsetL := candidateL.offset - e.cur 825 coffsetLP := candidateL.prev - e.cur 826 827 // Check if we have a long match. 828 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 829 // Found a long match, at least 8 bytes. 830 matched = e.matchlen(s+8, coffsetL+8, src) + 8 831 t = coffsetL 832 if debugAsserts && s <= t { 833 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 834 } 835 if debugAsserts && s-t > e.maxMatchOff { 836 panic("s - t >e.maxMatchOff") 837 } 838 if debugMatches { 839 println("long match") 840 } 841 842 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 843 // Found a long match, at least 8 bytes. 844 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 845 if prevMatch > matched { 846 matched = prevMatch 847 t = coffsetLP 848 } 849 if debugAsserts && s <= t { 850 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 851 } 852 if debugAsserts && s-t > e.maxMatchOff { 853 panic("s - t >e.maxMatchOff") 854 } 855 if debugMatches { 856 println("long match") 857 } 858 } 859 break 860 } 861 862 // Check if we have a long match on prev. 863 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { 864 // Found a long match, at least 8 bytes. 865 matched = e.matchlen(s+8, coffsetLP+8, src) + 8 866 t = coffsetLP 867 if debugAsserts && s <= t { 868 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 869 } 870 if debugAsserts && s-t > e.maxMatchOff { 871 panic("s - t >e.maxMatchOff") 872 } 873 if debugMatches { 874 println("long match") 875 } 876 break 877 } 878 879 coffsetS := candidateS.offset - e.cur 880 881 // Check if we have a short match. 882 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 883 // found a regular match 884 matched = e.matchlen(s+4, coffsetS+4, src) + 4 885 886 // See if we can find a long match at s+1 887 const checkAt = 1 888 cv := load6432(src, s+checkAt) 889 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) 890 candidateL = e.longTable[nextHashL] 891 coffsetL = candidateL.offset - e.cur 892 893 // We can store it, since we have at least a 4 byte match. 894 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} 895 e.markLongShardDirty(nextHashL) 896 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 897 // Found a long match, at least 8 bytes. 898 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 899 if matchedNext > matched { 900 t = coffsetL 901 s += checkAt 902 matched = matchedNext 903 if debugMatches { 904 println("long match (after short)") 905 } 906 break 907 } 908 } 909 910 // Check prev long... 911 coffsetL = candidateL.prev - e.cur 912 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { 913 // Found a long match, at least 8 bytes. 914 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 915 if matchedNext > matched { 916 t = coffsetL 917 s += checkAt 918 matched = matchedNext 919 if debugMatches { 920 println("prev long match (after short)") 921 } 922 break 923 } 924 } 925 t = coffsetS 926 if debugAsserts && s <= t { 927 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 928 } 929 if debugAsserts && s-t > e.maxMatchOff { 930 panic("s - t >e.maxMatchOff") 931 } 932 if debugAsserts && t < 0 { 933 panic("t<0") 934 } 935 if debugMatches { 936 println("short match") 937 } 938 break 939 } 940 941 // No match found, move forward in input. 942 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 943 if s >= sLimit { 944 break encodeLoop 945 } 946 cv = load6432(src, s) 947 } 948 // Try to find a better match by searching for a long match at the end of the current best match 949 if s+matched < sLimit { 950 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) 951 cv := load3232(src, s) 952 candidateL := e.longTable[nextHashL] 953 coffsetL := candidateL.offset - e.cur - matched 954 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 955 // Found a long match, at least 4 bytes. 956 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 957 if matchedNext > matched { 958 t = coffsetL 959 matched = matchedNext 960 if debugMatches { 961 println("long match at end-of-match") 962 } 963 } 964 } 965 966 // Check prev long... 967 if true { 968 coffsetL = candidateL.prev - e.cur - matched 969 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { 970 // Found a long match, at least 4 bytes. 971 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 972 if matchedNext > matched { 973 t = coffsetL 974 matched = matchedNext 975 if debugMatches { 976 println("prev long match at end-of-match") 977 } 978 } 979 } 980 } 981 } 982 // A match has been found. Update recent offsets. 983 offset2 = offset1 984 offset1 = s - t 985 986 if debugAsserts && s <= t { 987 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 988 } 989 990 if debugAsserts && canRepeat && int(offset1) > len(src) { 991 panic("invalid offset") 992 } 993 994 // Extend the n-byte match as long as possible. 995 l := matched 996 997 // Extend backwards 998 tMin := s - e.maxMatchOff 999 if tMin < 0 { 1000 tMin = 0 1001 } 1002 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 1003 s-- 1004 t-- 1005 l++ 1006 } 1007 1008 // Write our sequence 1009 var seq seq 1010 seq.litLen = uint32(s - nextEmit) 1011 seq.matchLen = uint32(l - zstdMinMatch) 1012 if seq.litLen > 0 { 1013 blk.literals = append(blk.literals, src[nextEmit:s]...) 1014 } 1015 seq.offset = uint32(s-t) + 3 1016 s += l 1017 if debugSequences { 1018 println("sequence", seq, "next s:", s) 1019 } 1020 blk.sequences = append(blk.sequences, seq) 1021 nextEmit = s 1022 if s >= sLimit { 1023 break encodeLoop 1024 } 1025 1026 // Index match start+1 (long) -> s - 1 1027 index0 := s - l + 1 1028 for index0 < s-1 { 1029 cv0 := load6432(src, index0) 1030 cv1 := cv0 >> 8 1031 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) 1032 off := index0 + e.cur 1033 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} 1034 e.markLongShardDirty(h0) 1035 h1 := hashLen(cv1, betterShortTableBits, betterShortLen) 1036 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} 1037 e.markShortShardDirty(h1) 1038 index0 += 2 1039 } 1040 1041 cv = load6432(src, s) 1042 if !canRepeat { 1043 continue 1044 } 1045 1046 // Check offset 2 1047 for { 1048 o2 := s - offset2 1049 if load3232(src, o2) != uint32(cv) { 1050 // Do regular search 1051 break 1052 } 1053 1054 // Store this, since we have it. 1055 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) 1056 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) 1057 1058 // We have at least 4 byte match. 1059 // No need to check backwards. We come straight from a match 1060 l := 4 + e.matchlen(s+4, o2+4, src) 1061 1062 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} 1063 e.markLongShardDirty(nextHashL) 1064 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} 1065 e.markShortShardDirty(nextHashS) 1066 seq.matchLen = uint32(l) - zstdMinMatch 1067 seq.litLen = 0 1068 1069 // Since litlen is always 0, this is offset 1. 1070 seq.offset = 1 1071 s += l 1072 nextEmit = s 1073 if debugSequences { 1074 println("sequence", seq, "next s:", s) 1075 } 1076 blk.sequences = append(blk.sequences, seq) 1077 1078 // Swap offset 1 and 2. 1079 offset1, offset2 = offset2, offset1 1080 if s >= sLimit { 1081 // Finished 1082 break encodeLoop 1083 } 1084 cv = load6432(src, s) 1085 } 1086 } 1087 1088 if int(nextEmit) < len(src) { 1089 blk.literals = append(blk.literals, src[nextEmit:]...) 1090 blk.extraLits = len(src) - int(nextEmit) 1091 } 1092 blk.recentOffsets[0] = uint32(offset1) 1093 blk.recentOffsets[1] = uint32(offset2) 1094 if debugEncoder { 1095 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 1096 } 1097 } 1098 1099 // ResetDict will reset and set a dictionary if not nil 1100 func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) { 1101 e.resetBase(d, singleBlock) 1102 if d != nil { 1103 panic("betterFastEncoder: Reset with dict") 1104 } 1105 } 1106 1107 // ResetDict will reset and set a dictionary if not nil 1108 func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { 1109 e.resetBase(d, singleBlock) 1110 if d == nil { 1111 return 1112 } 1113 // Init or copy dict table 1114 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID { 1115 if len(e.dictTable) != len(e.table) { 1116 e.dictTable = make([]tableEntry, len(e.table)) 1117 } 1118 end := int32(len(d.content)) - 8 + e.maxMatchOff 1119 for i := e.maxMatchOff; i < end; i += 4 { 1120 const hashLog = betterShortTableBits 1121 1122 cv := load6432(d.content, i-e.maxMatchOff) 1123 nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 1124 nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 1125 nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 1126 nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 1127 e.dictTable[nextHash] = tableEntry{ 1128 val: uint32(cv), 1129 offset: i, 1130 } 1131 e.dictTable[nextHash1] = tableEntry{ 1132 val: uint32(cv >> 8), 1133 offset: i + 1, 1134 } 1135 e.dictTable[nextHash2] = tableEntry{ 1136 val: uint32(cv >> 16), 1137 offset: i + 2, 1138 } 1139 e.dictTable[nextHash3] = tableEntry{ 1140 val: uint32(cv >> 24), 1141 offset: i + 3, 1142 } 1143 } 1144 e.lastDictID = d.id 1145 e.allDirty = true 1146 } 1147 1148 // Init or copy dict table 1149 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { 1150 if len(e.dictLongTable) != len(e.longTable) { 1151 e.dictLongTable = make([]prevEntry, len(e.longTable)) 1152 } 1153 if len(d.content) >= 8 { 1154 cv := load6432(d.content, 0) 1155 h := hashLen(cv, betterLongTableBits, betterLongLen) 1156 e.dictLongTable[h] = prevEntry{ 1157 offset: e.maxMatchOff, 1158 prev: e.dictLongTable[h].offset, 1159 } 1160 1161 end := int32(len(d.content)) - 8 + e.maxMatchOff 1162 off := 8 // First to read 1163 for i := e.maxMatchOff + 1; i < end; i++ { 1164 cv = cv>>8 | (uint64(d.content[off]) << 56) 1165 h := hashLen(cv, betterLongTableBits, betterLongLen) 1166 e.dictLongTable[h] = prevEntry{ 1167 offset: i, 1168 prev: e.dictLongTable[h].offset, 1169 } 1170 off++ 1171 } 1172 } 1173 e.lastDictID = d.id 1174 e.allDirty = true 1175 } 1176 1177 // Reset table to initial state 1178 { 1179 dirtyShardCnt := 0 1180 if !e.allDirty { 1181 for i := range e.shortTableShardDirty { 1182 if e.shortTableShardDirty[i] { 1183 dirtyShardCnt++ 1184 } 1185 } 1186 } 1187 const shardCnt = betterShortTableShardCnt 1188 const shardSize = betterShortTableShardSize 1189 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1190 copy(e.table[:], e.dictTable) 1191 for i := range e.shortTableShardDirty { 1192 e.shortTableShardDirty[i] = false 1193 } 1194 } else { 1195 for i := range e.shortTableShardDirty { 1196 if !e.shortTableShardDirty[i] { 1197 continue 1198 } 1199 1200 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) 1201 e.shortTableShardDirty[i] = false 1202 } 1203 } 1204 } 1205 { 1206 dirtyShardCnt := 0 1207 if !e.allDirty { 1208 for i := range e.shortTableShardDirty { 1209 if e.shortTableShardDirty[i] { 1210 dirtyShardCnt++ 1211 } 1212 } 1213 } 1214 const shardCnt = betterLongTableShardCnt 1215 const shardSize = betterLongTableShardSize 1216 if e.allDirty || dirtyShardCnt > shardCnt*4/6 { 1217 copy(e.longTable[:], e.dictLongTable) 1218 for i := range e.longTableShardDirty { 1219 e.longTableShardDirty[i] = false 1220 } 1221 } else { 1222 for i := range e.longTableShardDirty { 1223 if !e.longTableShardDirty[i] { 1224 continue 1225 } 1226 1227 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize]) 1228 e.longTableShardDirty[i] = false 1229 } 1230 } 1231 } 1232 e.cur = e.maxMatchOff 1233 e.allDirty = false 1234 } 1235 1236 func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) { 1237 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true 1238 } 1239 1240 func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) { 1241 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true 1242 }