github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/enc_dfast.go (about) 1 // Copyright 2019+ Klaus Post. All rights reserved. 2 // License information can be found in the LICENSE file. 3 // Based on work by Yann Collet, released under BSD License. 4 5 package zstd 6 7 import "fmt" 8 9 const ( 10 dFastLongTableBits = 17 // Bits used in the long match table 11 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table 12 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. 13 dFastLongLen = 8 // Bytes used for table hash 14 15 dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table 16 dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard 17 18 dFastShortTableBits = tableBits // Bits used in the short match table 19 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table 20 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. 21 dFastShortLen = 5 // Bytes used for table hash 22 23 ) 24 25 type doubleFastEncoder struct { 26 fastEncoder 27 longTable [dFastLongTableSize]tableEntry 28 } 29 30 type doubleFastEncoderDict struct { 31 fastEncoderDict 32 longTable [dFastLongTableSize]tableEntry 33 dictLongTable []tableEntry 34 longTableShardDirty [dLongTableShardCnt]bool 35 } 36 37 // Encode mimmics functionality in zstd_dfast.c 38 func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { 39 const ( 40 // Input margin is the number of bytes we read (8) 41 // and the maximum we will read ahead (2) 42 inputMargin = 8 + 2 43 minNonLiteralBlockSize = 16 44 ) 45 46 // Protect against e.cur wraparound. 47 for e.cur >= e.bufferReset-int32(len(e.hist)) { 48 if len(e.hist) == 0 { 49 e.table = [dFastShortTableSize]tableEntry{} 50 e.longTable = [dFastLongTableSize]tableEntry{} 51 e.cur = e.maxMatchOff 52 break 53 } 54 // Shift down everything in the table that isn't already too far away. 55 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 56 for i := range e.table[:] { 57 v := e.table[i].offset 58 if v < minOff { 59 v = 0 60 } else { 61 v = v - e.cur + e.maxMatchOff 62 } 63 e.table[i].offset = v 64 } 65 for i := range e.longTable[:] { 66 v := e.longTable[i].offset 67 if v < minOff { 68 v = 0 69 } else { 70 v = v - e.cur + e.maxMatchOff 71 } 72 e.longTable[i].offset = v 73 } 74 e.cur = e.maxMatchOff 75 break 76 } 77 78 s := e.addBlock(src) 79 blk.size = len(src) 80 if len(src) < minNonLiteralBlockSize { 81 blk.extraLits = len(src) 82 blk.literals = blk.literals[:len(src)] 83 copy(blk.literals, src) 84 return 85 } 86 87 // Override src 88 src = e.hist 89 sLimit := int32(len(src)) - inputMargin 90 // stepSize is the number of bytes to skip on every main loop iteration. 91 // It should be >= 1. 92 const stepSize = 1 93 94 const kSearchStrength = 8 95 96 // nextEmit is where in src the next emitLiteral should start from. 97 nextEmit := s 98 cv := load6432(src, s) 99 100 // Relative offsets 101 offset1 := int32(blk.recentOffsets[0]) 102 offset2 := int32(blk.recentOffsets[1]) 103 104 addLiterals := func(s *seq, until int32) { 105 if until == nextEmit { 106 return 107 } 108 blk.literals = append(blk.literals, src[nextEmit:until]...) 109 s.litLen = uint32(until - nextEmit) 110 } 111 if debugEncoder { 112 println("recent offsets:", blk.recentOffsets) 113 } 114 115 encodeLoop: 116 for { 117 var t int32 118 // We allow the encoder to optionally turn off repeat offsets across blocks 119 canRepeat := len(blk.sequences) > 2 120 121 for { 122 if debugAsserts && canRepeat && offset1 == 0 { 123 panic("offset0 was 0") 124 } 125 126 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 127 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) 128 candidateL := e.longTable[nextHashL] 129 candidateS := e.table[nextHashS] 130 131 const repOff = 1 132 repIndex := s - offset1 + repOff 133 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 134 e.longTable[nextHashL] = entry 135 e.table[nextHashS] = entry 136 137 if canRepeat { 138 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 139 // Consider history as well. 140 var seq seq 141 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 142 143 seq.matchLen = uint32(lenght - zstdMinMatch) 144 145 // We might be able to match backwards. 146 // Extend as long as we can. 147 start := s + repOff 148 // We end the search early, so we don't risk 0 literals 149 // and have to do special offset treatment. 150 startLimit := nextEmit + 1 151 152 tMin := s - e.maxMatchOff 153 if tMin < 0 { 154 tMin = 0 155 } 156 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 157 repIndex-- 158 start-- 159 seq.matchLen++ 160 } 161 addLiterals(&seq, start) 162 163 // rep 0 164 seq.offset = 1 165 if debugSequences { 166 println("repeat sequence", seq, "next s:", s) 167 } 168 blk.sequences = append(blk.sequences, seq) 169 s += lenght + repOff 170 nextEmit = s 171 if s >= sLimit { 172 if debugEncoder { 173 println("repeat ended", s, lenght) 174 175 } 176 break encodeLoop 177 } 178 cv = load6432(src, s) 179 continue 180 } 181 } 182 // Find the offsets of our two matches. 183 coffsetL := s - (candidateL.offset - e.cur) 184 coffsetS := s - (candidateS.offset - e.cur) 185 186 // Check if we have a long match. 187 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 188 // Found a long match, likely at least 8 bytes. 189 // Reference encoder checks all 8 bytes, we only check 4, 190 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 191 t = candidateL.offset - e.cur 192 if debugAsserts && s <= t { 193 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 194 } 195 if debugAsserts && s-t > e.maxMatchOff { 196 panic("s - t >e.maxMatchOff") 197 } 198 if debugMatches { 199 println("long match") 200 } 201 break 202 } 203 204 // Check if we have a short match. 205 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 206 // found a regular match 207 // See if we can find a long match at s+1 208 const checkAt = 1 209 cv := load6432(src, s+checkAt) 210 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) 211 candidateL = e.longTable[nextHashL] 212 coffsetL = s - (candidateL.offset - e.cur) + checkAt 213 214 // We can store it, since we have at least a 4 byte match. 215 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} 216 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 217 // Found a long match, likely at least 8 bytes. 218 // Reference encoder checks all 8 bytes, we only check 4, 219 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 220 t = candidateL.offset - e.cur 221 s += checkAt 222 if debugMatches { 223 println("long match (after short)") 224 } 225 break 226 } 227 228 t = candidateS.offset - e.cur 229 if debugAsserts && s <= t { 230 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 231 } 232 if debugAsserts && s-t > e.maxMatchOff { 233 panic("s - t >e.maxMatchOff") 234 } 235 if debugAsserts && t < 0 { 236 panic("t<0") 237 } 238 if debugMatches { 239 println("short match") 240 } 241 break 242 } 243 244 // No match found, move forward in input. 245 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 246 if s >= sLimit { 247 break encodeLoop 248 } 249 cv = load6432(src, s) 250 } 251 252 // A 4-byte match has been found. Update recent offsets. 253 // We'll later see if more than 4 bytes. 254 offset2 = offset1 255 offset1 = s - t 256 257 if debugAsserts && s <= t { 258 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 259 } 260 261 if debugAsserts && canRepeat && int(offset1) > len(src) { 262 panic("invalid offset") 263 } 264 265 // Extend the 4-byte match as long as possible. 266 l := e.matchlen(s+4, t+4, src) + 4 267 268 // Extend backwards 269 tMin := s - e.maxMatchOff 270 if tMin < 0 { 271 tMin = 0 272 } 273 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 274 s-- 275 t-- 276 l++ 277 } 278 279 // Write our sequence 280 var seq seq 281 seq.litLen = uint32(s - nextEmit) 282 seq.matchLen = uint32(l - zstdMinMatch) 283 if seq.litLen > 0 { 284 blk.literals = append(blk.literals, src[nextEmit:s]...) 285 } 286 seq.offset = uint32(s-t) + 3 287 s += l 288 if debugSequences { 289 println("sequence", seq, "next s:", s) 290 } 291 blk.sequences = append(blk.sequences, seq) 292 nextEmit = s 293 if s >= sLimit { 294 break encodeLoop 295 } 296 297 // Index match start+1 (long) and start+2 (short) 298 index0 := s - l + 1 299 // Index match end-2 (long) and end-1 (short) 300 index1 := s - 2 301 302 cv0 := load6432(src, index0) 303 cv1 := load6432(src, index1) 304 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} 305 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} 306 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 307 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 308 cv0 >>= 8 309 cv1 >>= 8 310 te0.offset++ 311 te1.offset++ 312 te0.val = uint32(cv0) 313 te1.val = uint32(cv1) 314 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 315 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 316 317 cv = load6432(src, s) 318 319 if !canRepeat { 320 continue 321 } 322 323 // Check offset 2 324 for { 325 o2 := s - offset2 326 if load3232(src, o2) != uint32(cv) { 327 // Do regular search 328 break 329 } 330 331 // Store this, since we have it. 332 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) 333 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 334 335 // We have at least 4 byte match. 336 // No need to check backwards. We come straight from a match 337 l := 4 + e.matchlen(s+4, o2+4, src) 338 339 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 340 e.longTable[nextHashL] = entry 341 e.table[nextHashS] = entry 342 seq.matchLen = uint32(l) - zstdMinMatch 343 seq.litLen = 0 344 345 // Since litlen is always 0, this is offset 1. 346 seq.offset = 1 347 s += l 348 nextEmit = s 349 if debugSequences { 350 println("sequence", seq, "next s:", s) 351 } 352 blk.sequences = append(blk.sequences, seq) 353 354 // Swap offset 1 and 2. 355 offset1, offset2 = offset2, offset1 356 if s >= sLimit { 357 // Finished 358 break encodeLoop 359 } 360 cv = load6432(src, s) 361 } 362 } 363 364 if int(nextEmit) < len(src) { 365 blk.literals = append(blk.literals, src[nextEmit:]...) 366 blk.extraLits = len(src) - int(nextEmit) 367 } 368 blk.recentOffsets[0] = uint32(offset1) 369 blk.recentOffsets[1] = uint32(offset2) 370 if debugEncoder { 371 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 372 } 373 } 374 375 // EncodeNoHist will encode a block with no history and no following blocks. 376 // Most notable difference is that src will not be copied for history and 377 // we do not need to check for max match length. 378 func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { 379 const ( 380 // Input margin is the number of bytes we read (8) 381 // and the maximum we will read ahead (2) 382 inputMargin = 8 + 2 383 minNonLiteralBlockSize = 16 384 ) 385 386 // Protect against e.cur wraparound. 387 if e.cur >= e.bufferReset { 388 for i := range e.table[:] { 389 e.table[i] = tableEntry{} 390 } 391 for i := range e.longTable[:] { 392 e.longTable[i] = tableEntry{} 393 } 394 e.cur = e.maxMatchOff 395 } 396 397 s := int32(0) 398 blk.size = len(src) 399 if len(src) < minNonLiteralBlockSize { 400 blk.extraLits = len(src) 401 blk.literals = blk.literals[:len(src)] 402 copy(blk.literals, src) 403 return 404 } 405 406 // Override src 407 sLimit := int32(len(src)) - inputMargin 408 // stepSize is the number of bytes to skip on every main loop iteration. 409 // It should be >= 1. 410 const stepSize = 1 411 412 const kSearchStrength = 8 413 414 // nextEmit is where in src the next emitLiteral should start from. 415 nextEmit := s 416 cv := load6432(src, s) 417 418 // Relative offsets 419 offset1 := int32(blk.recentOffsets[0]) 420 offset2 := int32(blk.recentOffsets[1]) 421 422 addLiterals := func(s *seq, until int32) { 423 if until == nextEmit { 424 return 425 } 426 blk.literals = append(blk.literals, src[nextEmit:until]...) 427 s.litLen = uint32(until - nextEmit) 428 } 429 if debugEncoder { 430 println("recent offsets:", blk.recentOffsets) 431 } 432 433 encodeLoop: 434 for { 435 var t int32 436 for { 437 438 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 439 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) 440 candidateL := e.longTable[nextHashL] 441 candidateS := e.table[nextHashS] 442 443 const repOff = 1 444 repIndex := s - offset1 + repOff 445 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 446 e.longTable[nextHashL] = entry 447 e.table[nextHashS] = entry 448 449 if len(blk.sequences) > 2 { 450 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 451 // Consider history as well. 452 var seq seq 453 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 454 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:])) 455 456 seq.matchLen = uint32(length - zstdMinMatch) 457 458 // We might be able to match backwards. 459 // Extend as long as we can. 460 start := s + repOff 461 // We end the search early, so we don't risk 0 literals 462 // and have to do special offset treatment. 463 startLimit := nextEmit + 1 464 465 tMin := s - e.maxMatchOff 466 if tMin < 0 { 467 tMin = 0 468 } 469 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] { 470 repIndex-- 471 start-- 472 seq.matchLen++ 473 } 474 addLiterals(&seq, start) 475 476 // rep 0 477 seq.offset = 1 478 if debugSequences { 479 println("repeat sequence", seq, "next s:", s) 480 } 481 blk.sequences = append(blk.sequences, seq) 482 s += length + repOff 483 nextEmit = s 484 if s >= sLimit { 485 if debugEncoder { 486 println("repeat ended", s, length) 487 488 } 489 break encodeLoop 490 } 491 cv = load6432(src, s) 492 continue 493 } 494 } 495 // Find the offsets of our two matches. 496 coffsetL := s - (candidateL.offset - e.cur) 497 coffsetS := s - (candidateS.offset - e.cur) 498 499 // Check if we have a long match. 500 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 501 // Found a long match, likely at least 8 bytes. 502 // Reference encoder checks all 8 bytes, we only check 4, 503 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 504 t = candidateL.offset - e.cur 505 if debugAsserts && s <= t { 506 panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur)) 507 } 508 if debugAsserts && s-t > e.maxMatchOff { 509 panic("s - t >e.maxMatchOff") 510 } 511 if debugMatches { 512 println("long match") 513 } 514 break 515 } 516 517 // Check if we have a short match. 518 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 519 // found a regular match 520 // See if we can find a long match at s+1 521 const checkAt = 1 522 cv := load6432(src, s+checkAt) 523 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) 524 candidateL = e.longTable[nextHashL] 525 coffsetL = s - (candidateL.offset - e.cur) + checkAt 526 527 // We can store it, since we have at least a 4 byte match. 528 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} 529 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 530 // Found a long match, likely at least 8 bytes. 531 // Reference encoder checks all 8 bytes, we only check 4, 532 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 533 t = candidateL.offset - e.cur 534 s += checkAt 535 if debugMatches { 536 println("long match (after short)") 537 } 538 break 539 } 540 541 t = candidateS.offset - e.cur 542 if debugAsserts && s <= t { 543 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 544 } 545 if debugAsserts && s-t > e.maxMatchOff { 546 panic("s - t >e.maxMatchOff") 547 } 548 if debugAsserts && t < 0 { 549 panic("t<0") 550 } 551 if debugMatches { 552 println("short match") 553 } 554 break 555 } 556 557 // No match found, move forward in input. 558 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 559 if s >= sLimit { 560 break encodeLoop 561 } 562 cv = load6432(src, s) 563 } 564 565 // A 4-byte match has been found. Update recent offsets. 566 // We'll later see if more than 4 bytes. 567 offset2 = offset1 568 offset1 = s - t 569 570 if debugAsserts && s <= t { 571 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 572 } 573 574 // Extend the 4-byte match as long as possible. 575 //l := e.matchlen(s+4, t+4, src) + 4 576 l := int32(matchLen(src[s+4:], src[t+4:])) + 4 577 578 // Extend backwards 579 tMin := s - e.maxMatchOff 580 if tMin < 0 { 581 tMin = 0 582 } 583 for t > tMin && s > nextEmit && src[t-1] == src[s-1] { 584 s-- 585 t-- 586 l++ 587 } 588 589 // Write our sequence 590 var seq seq 591 seq.litLen = uint32(s - nextEmit) 592 seq.matchLen = uint32(l - zstdMinMatch) 593 if seq.litLen > 0 { 594 blk.literals = append(blk.literals, src[nextEmit:s]...) 595 } 596 seq.offset = uint32(s-t) + 3 597 s += l 598 if debugSequences { 599 println("sequence", seq, "next s:", s) 600 } 601 blk.sequences = append(blk.sequences, seq) 602 nextEmit = s 603 if s >= sLimit { 604 break encodeLoop 605 } 606 607 // Index match start+1 (long) and start+2 (short) 608 index0 := s - l + 1 609 // Index match end-2 (long) and end-1 (short) 610 index1 := s - 2 611 612 cv0 := load6432(src, index0) 613 cv1 := load6432(src, index1) 614 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} 615 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} 616 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 617 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 618 cv0 >>= 8 619 cv1 >>= 8 620 te0.offset++ 621 te1.offset++ 622 te0.val = uint32(cv0) 623 te1.val = uint32(cv1) 624 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 625 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 626 627 cv = load6432(src, s) 628 629 if len(blk.sequences) <= 2 { 630 continue 631 } 632 633 // Check offset 2 634 for { 635 o2 := s - offset2 636 if load3232(src, o2) != uint32(cv) { 637 // Do regular search 638 break 639 } 640 641 // Store this, since we have it. 642 nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen) 643 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 644 645 // We have at least 4 byte match. 646 // No need to check backwards. We come straight from a match 647 //l := 4 + e.matchlen(s+4, o2+4, src) 648 l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) 649 650 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 651 e.longTable[nextHashL] = entry 652 e.table[nextHashS] = entry 653 seq.matchLen = uint32(l) - zstdMinMatch 654 seq.litLen = 0 655 656 // Since litlen is always 0, this is offset 1. 657 seq.offset = 1 658 s += l 659 nextEmit = s 660 if debugSequences { 661 println("sequence", seq, "next s:", s) 662 } 663 blk.sequences = append(blk.sequences, seq) 664 665 // Swap offset 1 and 2. 666 offset1, offset2 = offset2, offset1 667 if s >= sLimit { 668 // Finished 669 break encodeLoop 670 } 671 cv = load6432(src, s) 672 } 673 } 674 675 if int(nextEmit) < len(src) { 676 blk.literals = append(blk.literals, src[nextEmit:]...) 677 blk.extraLits = len(src) - int(nextEmit) 678 } 679 if debugEncoder { 680 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 681 } 682 683 // We do not store history, so we must offset e.cur to avoid false matches for next user. 684 if e.cur < e.bufferReset { 685 e.cur += int32(len(src)) 686 } 687 } 688 689 // Encode will encode the content, with a dictionary if initialized for it. 690 func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { 691 const ( 692 // Input margin is the number of bytes we read (8) 693 // and the maximum we will read ahead (2) 694 inputMargin = 8 + 2 695 minNonLiteralBlockSize = 16 696 ) 697 698 // Protect against e.cur wraparound. 699 for e.cur >= e.bufferReset-int32(len(e.hist)) { 700 if len(e.hist) == 0 { 701 for i := range e.table[:] { 702 e.table[i] = tableEntry{} 703 } 704 for i := range e.longTable[:] { 705 e.longTable[i] = tableEntry{} 706 } 707 e.markAllShardsDirty() 708 e.cur = e.maxMatchOff 709 break 710 } 711 // Shift down everything in the table that isn't already too far away. 712 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff 713 for i := range e.table[:] { 714 v := e.table[i].offset 715 if v < minOff { 716 v = 0 717 } else { 718 v = v - e.cur + e.maxMatchOff 719 } 720 e.table[i].offset = v 721 } 722 for i := range e.longTable[:] { 723 v := e.longTable[i].offset 724 if v < minOff { 725 v = 0 726 } else { 727 v = v - e.cur + e.maxMatchOff 728 } 729 e.longTable[i].offset = v 730 } 731 e.markAllShardsDirty() 732 e.cur = e.maxMatchOff 733 break 734 } 735 736 s := e.addBlock(src) 737 blk.size = len(src) 738 if len(src) < minNonLiteralBlockSize { 739 blk.extraLits = len(src) 740 blk.literals = blk.literals[:len(src)] 741 copy(blk.literals, src) 742 return 743 } 744 745 // Override src 746 src = e.hist 747 sLimit := int32(len(src)) - inputMargin 748 // stepSize is the number of bytes to skip on every main loop iteration. 749 // It should be >= 1. 750 const stepSize = 1 751 752 const kSearchStrength = 8 753 754 // nextEmit is where in src the next emitLiteral should start from. 755 nextEmit := s 756 cv := load6432(src, s) 757 758 // Relative offsets 759 offset1 := int32(blk.recentOffsets[0]) 760 offset2 := int32(blk.recentOffsets[1]) 761 762 addLiterals := func(s *seq, until int32) { 763 if until == nextEmit { 764 return 765 } 766 blk.literals = append(blk.literals, src[nextEmit:until]...) 767 s.litLen = uint32(until - nextEmit) 768 } 769 if debugEncoder { 770 println("recent offsets:", blk.recentOffsets) 771 } 772 773 encodeLoop: 774 for { 775 var t int32 776 // We allow the encoder to optionally turn off repeat offsets across blocks 777 canRepeat := len(blk.sequences) > 2 778 779 for { 780 if debugAsserts && canRepeat && offset1 == 0 { 781 panic("offset0 was 0") 782 } 783 784 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 785 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) 786 candidateL := e.longTable[nextHashL] 787 candidateS := e.table[nextHashS] 788 789 const repOff = 1 790 repIndex := s - offset1 + repOff 791 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 792 e.longTable[nextHashL] = entry 793 e.markLongShardDirty(nextHashL) 794 e.table[nextHashS] = entry 795 e.markShardDirty(nextHashS) 796 797 if canRepeat { 798 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { 799 // Consider history as well. 800 var seq seq 801 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) 802 803 seq.matchLen = uint32(lenght - zstdMinMatch) 804 805 // We might be able to match backwards. 806 // Extend as long as we can. 807 start := s + repOff 808 // We end the search early, so we don't risk 0 literals 809 // and have to do special offset treatment. 810 startLimit := nextEmit + 1 811 812 tMin := s - e.maxMatchOff 813 if tMin < 0 { 814 tMin = 0 815 } 816 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { 817 repIndex-- 818 start-- 819 seq.matchLen++ 820 } 821 addLiterals(&seq, start) 822 823 // rep 0 824 seq.offset = 1 825 if debugSequences { 826 println("repeat sequence", seq, "next s:", s) 827 } 828 blk.sequences = append(blk.sequences, seq) 829 s += lenght + repOff 830 nextEmit = s 831 if s >= sLimit { 832 if debugEncoder { 833 println("repeat ended", s, lenght) 834 835 } 836 break encodeLoop 837 } 838 cv = load6432(src, s) 839 continue 840 } 841 } 842 // Find the offsets of our two matches. 843 coffsetL := s - (candidateL.offset - e.cur) 844 coffsetS := s - (candidateS.offset - e.cur) 845 846 // Check if we have a long match. 847 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 848 // Found a long match, likely at least 8 bytes. 849 // Reference encoder checks all 8 bytes, we only check 4, 850 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 851 t = candidateL.offset - e.cur 852 if debugAsserts && s <= t { 853 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 854 } 855 if debugAsserts && s-t > e.maxMatchOff { 856 panic("s - t >e.maxMatchOff") 857 } 858 if debugMatches { 859 println("long match") 860 } 861 break 862 } 863 864 // Check if we have a short match. 865 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { 866 // found a regular match 867 // See if we can find a long match at s+1 868 const checkAt = 1 869 cv := load6432(src, s+checkAt) 870 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) 871 candidateL = e.longTable[nextHashL] 872 coffsetL = s - (candidateL.offset - e.cur) + checkAt 873 874 // We can store it, since we have at least a 4 byte match. 875 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} 876 e.markLongShardDirty(nextHashL) 877 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { 878 // Found a long match, likely at least 8 bytes. 879 // Reference encoder checks all 8 bytes, we only check 4, 880 // but the likelihood of both the first 4 bytes and the hash matching should be enough. 881 t = candidateL.offset - e.cur 882 s += checkAt 883 if debugMatches { 884 println("long match (after short)") 885 } 886 break 887 } 888 889 t = candidateS.offset - e.cur 890 if debugAsserts && s <= t { 891 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 892 } 893 if debugAsserts && s-t > e.maxMatchOff { 894 panic("s - t >e.maxMatchOff") 895 } 896 if debugAsserts && t < 0 { 897 panic("t<0") 898 } 899 if debugMatches { 900 println("short match") 901 } 902 break 903 } 904 905 // No match found, move forward in input. 906 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) 907 if s >= sLimit { 908 break encodeLoop 909 } 910 cv = load6432(src, s) 911 } 912 913 // A 4-byte match has been found. Update recent offsets. 914 // We'll later see if more than 4 bytes. 915 offset2 = offset1 916 offset1 = s - t 917 918 if debugAsserts && s <= t { 919 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) 920 } 921 922 if debugAsserts && canRepeat && int(offset1) > len(src) { 923 panic("invalid offset") 924 } 925 926 // Extend the 4-byte match as long as possible. 927 l := e.matchlen(s+4, t+4, src) + 4 928 929 // Extend backwards 930 tMin := s - e.maxMatchOff 931 if tMin < 0 { 932 tMin = 0 933 } 934 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { 935 s-- 936 t-- 937 l++ 938 } 939 940 // Write our sequence 941 var seq seq 942 seq.litLen = uint32(s - nextEmit) 943 seq.matchLen = uint32(l - zstdMinMatch) 944 if seq.litLen > 0 { 945 blk.literals = append(blk.literals, src[nextEmit:s]...) 946 } 947 seq.offset = uint32(s-t) + 3 948 s += l 949 if debugSequences { 950 println("sequence", seq, "next s:", s) 951 } 952 blk.sequences = append(blk.sequences, seq) 953 nextEmit = s 954 if s >= sLimit { 955 break encodeLoop 956 } 957 958 // Index match start+1 (long) and start+2 (short) 959 index0 := s - l + 1 960 // Index match end-2 (long) and end-1 (short) 961 index1 := s - 2 962 963 cv0 := load6432(src, index0) 964 cv1 := load6432(src, index1) 965 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} 966 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} 967 longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) 968 longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen) 969 e.longTable[longHash1] = te0 970 e.longTable[longHash2] = te1 971 e.markLongShardDirty(longHash1) 972 e.markLongShardDirty(longHash2) 973 cv0 >>= 8 974 cv1 >>= 8 975 te0.offset++ 976 te1.offset++ 977 te0.val = uint32(cv0) 978 te1.val = uint32(cv1) 979 hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen) 980 hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen) 981 e.table[hashVal1] = te0 982 e.markShardDirty(hashVal1) 983 e.table[hashVal2] = te1 984 e.markShardDirty(hashVal2) 985 986 cv = load6432(src, s) 987 988 if !canRepeat { 989 continue 990 } 991 992 // Check offset 2 993 for { 994 o2 := s - offset2 995 if load3232(src, o2) != uint32(cv) { 996 // Do regular search 997 break 998 } 999 1000 // Store this, since we have it. 1001 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) 1002 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) 1003 1004 // We have at least 4 byte match. 1005 // No need to check backwards. We come straight from a match 1006 l := 4 + e.matchlen(s+4, o2+4, src) 1007 1008 entry := tableEntry{offset: s + e.cur, val: uint32(cv)} 1009 e.longTable[nextHashL] = entry 1010 e.markLongShardDirty(nextHashL) 1011 e.table[nextHashS] = entry 1012 e.markShardDirty(nextHashS) 1013 seq.matchLen = uint32(l) - zstdMinMatch 1014 seq.litLen = 0 1015 1016 // Since litlen is always 0, this is offset 1. 1017 seq.offset = 1 1018 s += l 1019 nextEmit = s 1020 if debugSequences { 1021 println("sequence", seq, "next s:", s) 1022 } 1023 blk.sequences = append(blk.sequences, seq) 1024 1025 // Swap offset 1 and 2. 1026 offset1, offset2 = offset2, offset1 1027 if s >= sLimit { 1028 // Finished 1029 break encodeLoop 1030 } 1031 cv = load6432(src, s) 1032 } 1033 } 1034 1035 if int(nextEmit) < len(src) { 1036 blk.literals = append(blk.literals, src[nextEmit:]...) 1037 blk.extraLits = len(src) - int(nextEmit) 1038 } 1039 blk.recentOffsets[0] = uint32(offset1) 1040 blk.recentOffsets[1] = uint32(offset2) 1041 if debugEncoder { 1042 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) 1043 } 1044 // If we encoded more than 64K mark all dirty. 1045 if len(src) > 64<<10 { 1046 e.markAllShardsDirty() 1047 } 1048 } 1049 1050 // ResetDict will reset and set a dictionary if not nil 1051 func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) { 1052 e.fastEncoder.Reset(d, singleBlock) 1053 if d != nil { 1054 panic("doubleFastEncoder: Reset with dict not supported") 1055 } 1056 } 1057 1058 // ResetDict will reset and set a dictionary if not nil 1059 func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { 1060 allDirty := e.allDirty 1061 e.fastEncoderDict.Reset(d, singleBlock) 1062 if d == nil { 1063 return 1064 } 1065 1066 // Init or copy dict table 1067 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID { 1068 if len(e.dictLongTable) != len(e.longTable) { 1069 e.dictLongTable = make([]tableEntry, len(e.longTable)) 1070 } 1071 if len(d.content) >= 8 { 1072 cv := load6432(d.content, 0) 1073 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ 1074 val: uint32(cv), 1075 offset: e.maxMatchOff, 1076 } 1077 end := int32(len(d.content)) - 8 + e.maxMatchOff 1078 for i := e.maxMatchOff + 1; i < end; i++ { 1079 cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56) 1080 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ 1081 val: uint32(cv), 1082 offset: i, 1083 } 1084 } 1085 } 1086 e.lastDictID = d.id 1087 e.allDirty = true 1088 } 1089 // Reset table to initial state 1090 e.cur = e.maxMatchOff 1091 1092 dirtyShardCnt := 0 1093 if !allDirty { 1094 for i := range e.longTableShardDirty { 1095 if e.longTableShardDirty[i] { 1096 dirtyShardCnt++ 1097 } 1098 } 1099 } 1100 1101 if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { 1102 //copy(e.longTable[:], e.dictLongTable) 1103 e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable) 1104 for i := range e.longTableShardDirty { 1105 e.longTableShardDirty[i] = false 1106 } 1107 return 1108 } 1109 for i := range e.longTableShardDirty { 1110 if !e.longTableShardDirty[i] { 1111 continue 1112 } 1113 1114 // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) 1115 *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:]) 1116 1117 e.longTableShardDirty[i] = false 1118 } 1119 } 1120 1121 func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) { 1122 e.longTableShardDirty[entryNum/dLongTableShardSize] = true 1123 }