github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/compress/flate/deflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "fmt" 9 "io" 10 "math" 11 ) 12 13 const ( 14 NoCompression = 0 15 BestSpeed = 1 16 BestCompression = 9 17 DefaultCompression = -1 18 HuffmanOnly = -2 // Disables match search and only does Huffman entropy reduction. 19 logWindowSize = 15 20 windowSize = 1 << logWindowSize 21 windowMask = windowSize - 1 22 logMaxOffsetSize = 15 // Standard DEFLATE 23 minMatchLength = 4 // The smallest match that the compressor looks for 24 maxMatchLength = 258 // The longest match for the compressor 25 minOffsetSize = 1 // The shortest offset that makes any sense 26 27 // The maximum number of tokens we put into a single flat block, just to 28 // stop things from getting too large. 29 maxFlateBlockTokens = 1 << 14 30 maxStoreBlockSize = 65535 31 hashBits = 17 // After 17 performance degrades 32 hashSize = 1 << hashBits 33 hashMask = (1 << hashBits) - 1 34 maxHashOffset = 1 << 24 35 36 skipNever = math.MaxInt32 37 ) 38 39 type compressionLevel struct { 40 level, good, lazy, nice, chain, fastSkipHashing int 41 } 42 43 var levels = []compressionLevel{ 44 {}, // 0 45 // For levels 1-3 we don't bother trying with lazy matches 46 {1, 4, 0, 8, 4, 4}, 47 {2, 4, 0, 16, 8, 5}, 48 {3, 4, 0, 32, 32, 6}, 49 // Levels 4-9 use increasingly more lazy matching 50 // and increasingly stringent conditions for "good enough". 51 {4, 4, 4, 16, 16, skipNever}, 52 {5, 8, 16, 32, 32, skipNever}, 53 {6, 8, 16, 128, 128, skipNever}, 54 {7, 8, 32, 128, 256, skipNever}, 55 {8, 32, 128, 258, 1024, skipNever}, 56 {9, 32, 258, 258, 4096, skipNever}, 57 } 58 59 type compressor struct { 60 compressionLevel 61 62 w *huffmanBitWriter 63 bulkHasher func([]byte, []uint32) 64 65 // compression algorithm 66 fill func(*compressor, []byte) int // copy data to window 67 step func(*compressor) // process window 68 sync bool // requesting flush 69 70 // Input hash chains 71 // hashHead[hashValue] contains the largest inputIndex with the specified hash value 72 // If hashHead[hashValue] is within the current window, then 73 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index 74 // with the same hash value. 75 chainHead int 76 hashHead []uint32 77 hashPrev []uint32 78 hashOffset int 79 80 // input window: unprocessed data is window[index:windowEnd] 81 index int 82 window []byte 83 windowEnd int 84 blockStart int // window index where current tokens start 85 byteAvailable bool // if true, still need to process window[index-1]. 86 87 // queued output tokens 88 tokens []token 89 90 // deflate state 91 length int 92 offset int 93 hash uint32 94 maxInsertIndex int 95 err error 96 97 // hashMatch must be able to contain hashes for the maximum match length. 98 hashMatch [maxMatchLength - 1]uint32 99 } 100 101 func (d *compressor) fillDeflate(b []byte) int { 102 if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { 103 // shift the window by windowSize 104 copy(d.window, d.window[windowSize:2*windowSize]) 105 d.index -= windowSize 106 d.windowEnd -= windowSize 107 if d.blockStart >= windowSize { 108 d.blockStart -= windowSize 109 } else { 110 d.blockStart = math.MaxInt32 111 } 112 d.hashOffset += windowSize 113 if d.hashOffset > maxHashOffset { 114 delta := d.hashOffset - 1 115 d.hashOffset -= delta 116 d.chainHead -= delta 117 for i, v := range d.hashPrev { 118 if int(v) > delta { 119 d.hashPrev[i] = uint32(int(v) - delta) 120 } else { 121 d.hashPrev[i] = 0 122 } 123 } 124 for i, v := range d.hashHead { 125 if int(v) > delta { 126 d.hashHead[i] = uint32(int(v) - delta) 127 } else { 128 d.hashHead[i] = 0 129 } 130 } 131 } 132 } 133 n := copy(d.window[d.windowEnd:], b) 134 d.windowEnd += n 135 return n 136 } 137 138 func (d *compressor) writeBlock(tokens []token, index int) error { 139 if index > 0 { 140 var window []byte 141 if d.blockStart <= index { 142 window = d.window[d.blockStart:index] 143 } 144 d.blockStart = index 145 d.w.writeBlock(tokens, false, window) 146 return d.w.err 147 } 148 return nil 149 } 150 151 // fillWindow will fill the current window with the supplied 152 // dictionary and calculate all hashes. 153 // This is much faster than doing a full encode. 154 // Should only be used after a reset. 155 func (d *compressor) fillWindow(b []byte) { 156 // Do not fill window if we are in store-only mode. 157 if d.compressionLevel.level == 0 { 158 return 159 } 160 if d.index != 0 || d.windowEnd != 0 { 161 panic("internal error: fillWindow called with stale data") 162 } 163 164 // If we are given too much, cut it. 165 if len(b) > windowSize { 166 b = b[len(b)-windowSize:] 167 } 168 // Add all to window. 169 n := copy(d.window, b) 170 171 // Calculate 256 hashes at the time (more L1 cache hits) 172 loops := (n + 256 - minMatchLength) / 256 173 for j := 0; j < loops; j++ { 174 index := j * 256 175 end := index + 256 + minMatchLength - 1 176 if end > n { 177 end = n 178 } 179 toCheck := d.window[index:end] 180 dstSize := len(toCheck) - minMatchLength + 1 181 182 if dstSize <= 0 { 183 continue 184 } 185 186 dst := d.hashMatch[:dstSize] 187 d.bulkHasher(toCheck, dst) 188 var newH uint32 189 for i, val := range dst { 190 di := i + index 191 newH = val & hashMask 192 // Get previous value with the same hash. 193 // Our chain should point to the previous value. 194 d.hashPrev[di&windowMask] = d.hashHead[newH] 195 // Set the head of the hash chain to us. 196 d.hashHead[newH] = uint32(di + d.hashOffset) 197 } 198 d.hash = newH 199 } 200 // Update window information. 201 d.windowEnd = n 202 d.index = n 203 } 204 205 // Try to find a match starting at index whose length is greater than prevSize. 206 // We only look at chainCount possibilities before giving up. 207 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { 208 minMatchLook := maxMatchLength 209 if lookahead < minMatchLook { 210 minMatchLook = lookahead 211 } 212 213 win := d.window[0 : pos+minMatchLook] 214 215 // We quit when we get a match that's at least nice long 216 nice := len(win) - pos 217 if d.nice < nice { 218 nice = d.nice 219 } 220 221 // If we've got a match that's good enough, only look in 1/4 the chain. 222 tries := d.chain 223 length = prevLength 224 if length >= d.good { 225 tries >>= 2 226 } 227 228 wEnd := win[pos+length] 229 wPos := win[pos:] 230 minIndex := pos - windowSize 231 232 for i := prevHead; tries > 0; tries-- { 233 if wEnd == win[i+length] { 234 n := matchLen(win[i:], wPos, minMatchLook) 235 236 if n > length && (n > minMatchLength || pos-i <= 4096) { 237 length = n 238 offset = pos - i 239 ok = true 240 if n >= nice { 241 // The match is good enough that we don't try to find a better one. 242 break 243 } 244 wEnd = win[pos+n] 245 } 246 } 247 if i == minIndex { 248 // hashPrev[i & windowMask] has already been overwritten, so stop now. 249 break 250 } 251 i = int(d.hashPrev[i&windowMask]) - d.hashOffset 252 if i < minIndex || i < 0 { 253 break 254 } 255 } 256 return 257 } 258 259 func (d *compressor) writeStoredBlock(buf []byte) error { 260 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { 261 return d.w.err 262 } 263 d.w.writeBytes(buf) 264 return d.w.err 265 } 266 267 const hashmul = 0x1e35a7bd 268 269 // hash4 returns a hash representation of the first 4 bytes 270 // of the supplied slice. 271 // The caller must ensure that len(b) >= 4. 272 func hash4(b []byte) uint32 { 273 return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits) 274 } 275 276 // bulkHash4 will compute hashes using the same 277 // algorithm as hash4 278 func bulkHash4(b []byte, dst []uint32) { 279 if len(b) < minMatchLength { 280 return 281 } 282 hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 283 dst[0] = (hb * hashmul) >> (32 - hashBits) 284 end := len(b) - minMatchLength + 1 285 for i := 1; i < end; i++ { 286 hb = (hb << 8) | uint32(b[i+3]) 287 dst[i] = (hb * hashmul) >> (32 - hashBits) 288 } 289 } 290 291 // matchLen returns the number of matching bytes in a and b 292 // up to length 'max'. Both slices must be at least 'max' 293 // bytes in size. 294 func matchLen(a, b []byte, max int) int { 295 a = a[:max] 296 for i, av := range a { 297 if b[i] != av { 298 return i 299 } 300 } 301 return max 302 } 303 304 func (d *compressor) initDeflate() { 305 d.hashHead = make([]uint32, hashSize) 306 d.hashPrev = make([]uint32, windowSize) 307 d.window = make([]byte, 2*windowSize) 308 d.hashOffset = 1 309 d.tokens = make([]token, 0, maxFlateBlockTokens+1) 310 d.length = minMatchLength - 1 311 d.offset = 0 312 d.byteAvailable = false 313 d.index = 0 314 d.hash = 0 315 d.chainHead = -1 316 d.bulkHasher = bulkHash4 317 } 318 319 func (d *compressor) deflate() { 320 if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { 321 return 322 } 323 324 d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) 325 if d.index < d.maxInsertIndex { 326 d.hash = hash4(d.window[d.index : d.index+minMatchLength]) 327 } 328 329 Loop: 330 for { 331 if d.index > d.windowEnd { 332 panic("index > windowEnd") 333 } 334 lookahead := d.windowEnd - d.index 335 if lookahead < minMatchLength+maxMatchLength { 336 if !d.sync { 337 break Loop 338 } 339 if d.index > d.windowEnd { 340 panic("index > windowEnd") 341 } 342 if lookahead == 0 { 343 // Flush current output block if any. 344 if d.byteAvailable { 345 // There is still one pending token that needs to be flushed 346 d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1]))) 347 d.byteAvailable = false 348 } 349 if len(d.tokens) > 0 { 350 if d.err = d.writeBlock(d.tokens, d.index); d.err != nil { 351 return 352 } 353 d.tokens = d.tokens[:0] 354 } 355 break Loop 356 } 357 } 358 if d.index < d.maxInsertIndex { 359 // Update the hash 360 d.hash = hash4(d.window[d.index : d.index+minMatchLength]) 361 d.chainHead = int(d.hashHead[d.hash]) 362 d.hashPrev[d.index&windowMask] = uint32(d.chainHead) 363 d.hashHead[d.hash] = uint32(d.index + d.hashOffset) 364 } 365 prevLength := d.length 366 prevOffset := d.offset 367 d.length = minMatchLength - 1 368 d.offset = 0 369 minIndex := d.index - windowSize 370 if minIndex < 0 { 371 minIndex = 0 372 } 373 374 if d.chainHead-d.hashOffset >= minIndex && 375 (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 || 376 d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) { 377 if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { 378 d.length = newLength 379 d.offset = newOffset 380 } 381 } 382 if d.fastSkipHashing != skipNever && d.length >= minMatchLength || 383 d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength { 384 // There was a match at the previous step, and the current match is 385 // not better. Output the previous match. 386 if d.fastSkipHashing != skipNever { 387 d.tokens = append(d.tokens, matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize))) 388 } else { 389 d.tokens = append(d.tokens, matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))) 390 } 391 // Insert in the hash table all strings up to the end of the match. 392 // index and index-1 are already inserted. If there is not enough 393 // lookahead, the last two strings are not inserted into the hash 394 // table. 395 if d.length <= d.fastSkipHashing { 396 var newIndex int 397 if d.fastSkipHashing != skipNever { 398 newIndex = d.index + d.length 399 } else { 400 newIndex = d.index + prevLength - 1 401 } 402 for d.index++; d.index < newIndex; d.index++ { 403 if d.index < d.maxInsertIndex { 404 d.hash = hash4(d.window[d.index : d.index+minMatchLength]) 405 // Get previous value with the same hash. 406 // Our chain should point to the previous value. 407 d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] 408 // Set the head of the hash chain to us. 409 d.hashHead[d.hash] = uint32(d.index + d.hashOffset) 410 } 411 } 412 if d.fastSkipHashing == skipNever { 413 d.byteAvailable = false 414 d.length = minMatchLength - 1 415 } 416 } else { 417 // For matches this long, we don't bother inserting each individual 418 // item into the table. 419 d.index += d.length 420 if d.index < d.maxInsertIndex { 421 d.hash = hash4(d.window[d.index : d.index+minMatchLength]) 422 } 423 } 424 if len(d.tokens) == maxFlateBlockTokens { 425 // The block includes the current character 426 if d.err = d.writeBlock(d.tokens, d.index); d.err != nil { 427 return 428 } 429 d.tokens = d.tokens[:0] 430 } 431 } else { 432 if d.fastSkipHashing != skipNever || d.byteAvailable { 433 i := d.index - 1 434 if d.fastSkipHashing != skipNever { 435 i = d.index 436 } 437 d.tokens = append(d.tokens, literalToken(uint32(d.window[i]))) 438 if len(d.tokens) == maxFlateBlockTokens { 439 if d.err = d.writeBlock(d.tokens, i+1); d.err != nil { 440 return 441 } 442 d.tokens = d.tokens[:0] 443 } 444 } 445 d.index++ 446 if d.fastSkipHashing == skipNever { 447 d.byteAvailable = true 448 } 449 } 450 } 451 } 452 453 func (d *compressor) fillStore(b []byte) int { 454 n := copy(d.window[d.windowEnd:], b) 455 d.windowEnd += n 456 return n 457 } 458 459 func (d *compressor) store() { 460 if d.windowEnd > 0 { 461 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 462 } 463 d.windowEnd = 0 464 } 465 466 // storeHuff compresses and stores the currently added data 467 // when the d.window is full or we are at the end of the stream. 468 // Any error that occurred will be in d.err 469 func (d *compressor) storeHuff() { 470 if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { 471 return 472 } 473 d.w.writeBlockHuff(false, d.window[:d.windowEnd]) 474 d.err = d.w.err 475 d.windowEnd = 0 476 } 477 478 func (d *compressor) write(b []byte) (n int, err error) { 479 if d.err != nil { 480 return 0, d.err 481 } 482 n = len(b) 483 for len(b) > 0 { 484 d.step(d) 485 b = b[d.fill(d, b):] 486 if d.err != nil { 487 return 0, d.err 488 } 489 } 490 return n, nil 491 } 492 493 func (d *compressor) syncFlush() error { 494 if d.err != nil { 495 return d.err 496 } 497 d.sync = true 498 d.step(d) 499 if d.err == nil { 500 d.w.writeStoredHeader(0, false) 501 d.w.flush() 502 d.err = d.w.err 503 } 504 d.sync = false 505 return d.err 506 } 507 508 func (d *compressor) init(w io.Writer, level int) (err error) { 509 d.w = newHuffmanBitWriter(w) 510 511 switch { 512 case level == NoCompression: 513 d.window = make([]byte, maxStoreBlockSize) 514 d.fill = (*compressor).fillStore 515 d.step = (*compressor).store 516 case level == HuffmanOnly: 517 d.window = make([]byte, maxStoreBlockSize) 518 d.fill = (*compressor).fillStore 519 d.step = (*compressor).storeHuff 520 case level == DefaultCompression: 521 level = 6 522 fallthrough 523 case 1 <= level && level <= 9: 524 d.compressionLevel = levels[level] 525 d.initDeflate() 526 d.fill = (*compressor).fillDeflate 527 d.step = (*compressor).deflate 528 default: 529 return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) 530 } 531 return nil 532 } 533 534 // hzeroes is used for zeroing the hash slice. 535 var hzeroes [256]uint32 536 537 func (d *compressor) reset(w io.Writer) { 538 d.w.reset(w) 539 d.sync = false 540 d.err = nil 541 switch d.compressionLevel.level { 542 case NoCompression: 543 d.windowEnd = 0 544 default: 545 d.chainHead = -1 546 for s := d.hashHead; len(s) > 0; { 547 n := copy(s, hzeroes[:]) 548 s = s[n:] 549 } 550 for s := d.hashPrev; len(s) > 0; s = s[len(hzeroes):] { 551 copy(s, hzeroes[:]) 552 } 553 d.hashOffset = 1 554 555 d.index, d.windowEnd = 0, 0 556 d.blockStart, d.byteAvailable = 0, false 557 d.tokens = d.tokens[:0] 558 d.length = minMatchLength - 1 559 d.offset = 0 560 d.hash = 0 561 d.maxInsertIndex = 0 562 } 563 } 564 565 func (d *compressor) close() error { 566 if d.err != nil { 567 return d.err 568 } 569 d.sync = true 570 d.step(d) 571 if d.err != nil { 572 return d.err 573 } 574 if d.w.writeStoredHeader(0, true); d.w.err != nil { 575 return d.w.err 576 } 577 d.w.flush() 578 return d.w.err 579 } 580 581 // NewWriter returns a new Writer compressing data at the given level. 582 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); 583 // higher levels typically run slower but compress more. Level 0 584 // (NoCompression) does not attempt any compression; it only adds the 585 // necessary DEFLATE framing. 586 // Level -1 (DefaultCompression) uses the default compression level. 587 // Level -2 (HuffmanOnly) will use Huffman compression only, giving 588 // a very fast compression for all types of input, but sacrificing considerable 589 // compression efficiency. 590 // 591 // 592 // If level is in the range [-2, 9] then the error returned will be nil. 593 // Otherwise the error returned will be non-nil. 594 func NewWriter(w io.Writer, level int) (*Writer, error) { 595 var dw Writer 596 if err := dw.d.init(w, level); err != nil { 597 return nil, err 598 } 599 return &dw, nil 600 } 601 602 // NewWriterDict is like NewWriter but initializes the new 603 // Writer with a preset dictionary. The returned Writer behaves 604 // as if the dictionary had been written to it without producing 605 // any compressed output. The compressed data written to w 606 // can only be decompressed by a Reader initialized with the 607 // same dictionary. 608 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { 609 dw := &dictWriter{w} 610 zw, err := NewWriter(dw, level) 611 if err != nil { 612 return nil, err 613 } 614 zw.d.fillWindow(dict) 615 zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. 616 return zw, err 617 } 618 619 type dictWriter struct { 620 w io.Writer 621 } 622 623 func (w *dictWriter) Write(b []byte) (n int, err error) { 624 return w.w.Write(b) 625 } 626 627 // A Writer takes data written to it and writes the compressed 628 // form of that data to an underlying writer (see NewWriter). 629 type Writer struct { 630 d compressor 631 dict []byte 632 } 633 634 // Write writes data to w, which will eventually write the 635 // compressed form of data to its underlying writer. 636 func (w *Writer) Write(data []byte) (n int, err error) { 637 return w.d.write(data) 638 } 639 640 // Flush flushes any pending compressed data to the underlying writer. 641 // It is useful mainly in compressed network protocols, to ensure that 642 // a remote reader has enough data to reconstruct a packet. 643 // Flush does not return until the data has been written. 644 // If the underlying writer returns an error, Flush returns that error. 645 // 646 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. 647 func (w *Writer) Flush() error { 648 // For more about flushing: 649 // http://www.bolet.org/~pornin/deflate-flush.html 650 return w.d.syncFlush() 651 } 652 653 // Close flushes and closes the writer. 654 func (w *Writer) Close() error { 655 return w.d.close() 656 } 657 658 // Reset discards the writer's state and makes it equivalent to 659 // the result of NewWriter or NewWriterDict called with dst 660 // and w's level and dictionary. 661 func (w *Writer) Reset(dst io.Writer) { 662 if dw, ok := w.d.w.w.(*dictWriter); ok { 663 // w was created with NewWriterDict 664 dw.w = dst 665 w.d.reset(dw) 666 w.d.fillWindow(w.dict) 667 } else { 668 // w was created with NewWriter 669 w.d.reset(dst) 670 } 671 }