github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/compress/flate/deflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "fmt" 9 "io" 10 "math" 11 ) 12 13 const ( 14 NoCompression = 0 15 BestSpeed = 1 16 fastCompression = 3 17 BestCompression = 9 18 DefaultCompression = -1 19 logWindowSize = 15 20 windowSize = 1 << logWindowSize 21 windowMask = windowSize - 1 22 logMaxOffsetSize = 15 // Standard DEFLATE 23 minMatchLength = 3 // The smallest match that the compressor looks for 24 maxMatchLength = 258 // The longest match for the compressor 25 minOffsetSize = 1 // The shortest offset that makes any sense 26 27 // The maximum number of tokens we put into a single flat block, just to 28 // stop things from getting too large. 29 maxFlateBlockTokens = 1 << 14 30 maxStoreBlockSize = 65535 31 hashBits = 17 32 hashSize = 1 << hashBits 33 hashMask = (1 << hashBits) - 1 34 hashShift = (hashBits + minMatchLength - 1) / minMatchLength 35 maxHashOffset = 1 << 24 36 37 skipNever = math.MaxInt32 38 ) 39 40 type compressionLevel struct { 41 good, lazy, nice, chain, fastSkipHashing int 42 } 43 44 var levels = []compressionLevel{ 45 {}, // 0 46 // For levels 1-3 we don't bother trying with lazy matches 47 {3, 0, 8, 4, 4}, 48 {3, 0, 16, 8, 5}, 49 {3, 0, 32, 32, 6}, 50 // Levels 4-9 use increasingly more lazy matching 51 // and increasingly stringent conditions for "good enough". 52 {4, 4, 16, 16, skipNever}, 53 {8, 16, 32, 32, skipNever}, 54 {8, 16, 128, 128, skipNever}, 55 {8, 32, 128, 256, skipNever}, 56 {32, 128, 258, 1024, skipNever}, 57 {32, 258, 258, 4096, skipNever}, 58 } 59 60 type compressor struct { 61 compressionLevel 62 63 w *huffmanBitWriter 64 65 // compression algorithm 66 fill func(*compressor, []byte) int // copy data to window 67 step func(*compressor) // process window 68 sync bool // requesting flush 69 70 // Input hash chains 71 // hashHead[hashValue] contains the largest inputIndex with the specified hash value 72 // If hashHead[hashValue] is within the current window, then 73 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index 74 // with the same hash value. 75 chainHead int 76 hashHead []int 77 hashPrev []int 78 hashOffset int 79 80 // input window: unprocessed data is window[index:windowEnd] 81 index int 82 window []byte 83 windowEnd int 84 blockStart int // window index where current tokens start 85 byteAvailable bool // if true, still need to process window[index-1]. 86 87 // queued output tokens 88 tokens []token 89 90 // deflate state 91 length int 92 offset int 93 hash int 94 maxInsertIndex int 95 err error 96 } 97 98 func (d *compressor) fillDeflate(b []byte) int { 99 if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { 100 // shift the window by windowSize 101 copy(d.window, d.window[windowSize:2*windowSize]) 102 d.index -= windowSize 103 d.windowEnd -= windowSize 104 if d.blockStart >= windowSize { 105 d.blockStart -= windowSize 106 } else { 107 d.blockStart = math.MaxInt32 108 } 109 d.hashOffset += windowSize 110 if d.hashOffset > maxHashOffset { 111 delta := d.hashOffset - 1 112 d.hashOffset -= delta 113 d.chainHead -= delta 114 for i, v := range d.hashPrev { 115 if v > delta { 116 d.hashPrev[i] -= delta 117 } else { 118 d.hashPrev[i] = 0 119 } 120 } 121 for i, v := range d.hashHead { 122 if v > delta { 123 d.hashHead[i] -= delta 124 } else { 125 d.hashHead[i] = 0 126 } 127 } 128 } 129 } 130 n := copy(d.window[d.windowEnd:], b) 131 d.windowEnd += n 132 return n 133 } 134 135 func (d *compressor) writeBlock(tokens []token, index int, eof bool) error { 136 if index > 0 || eof { 137 var window []byte 138 if d.blockStart <= index { 139 window = d.window[d.blockStart:index] 140 } 141 d.blockStart = index 142 d.w.writeBlock(tokens, eof, window) 143 return d.w.err 144 } 145 return nil 146 } 147 148 // Try to find a match starting at index whose length is greater than prevSize. 149 // We only look at chainCount possibilities before giving up. 150 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { 151 minMatchLook := maxMatchLength 152 if lookahead < minMatchLook { 153 minMatchLook = lookahead 154 } 155 156 win := d.window[0 : pos+minMatchLook] 157 158 // We quit when we get a match that's at least nice long 159 nice := len(win) - pos 160 if d.nice < nice { 161 nice = d.nice 162 } 163 164 // If we've got a match that's good enough, only look in 1/4 the chain. 165 tries := d.chain 166 length = prevLength 167 if length >= d.good { 168 tries >>= 2 169 } 170 171 w0 := win[pos] 172 w1 := win[pos+1] 173 wEnd := win[pos+length] 174 minIndex := pos - windowSize 175 176 for i := prevHead; tries > 0; tries-- { 177 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { 178 // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches 179 180 n := 3 181 for pos+n < len(win) && win[i+n] == win[pos+n] { 182 n++ 183 } 184 if n > length && (n > 3 || pos-i <= 4096) { 185 length = n 186 offset = pos - i 187 ok = true 188 if n >= nice { 189 // The match is good enough that we don't try to find a better one. 190 break 191 } 192 wEnd = win[pos+n] 193 } 194 } 195 if i == minIndex { 196 // hashPrev[i & windowMask] has already been overwritten, so stop now. 197 break 198 } 199 if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 { 200 break 201 } 202 } 203 return 204 } 205 206 func (d *compressor) writeStoredBlock(buf []byte) error { 207 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { 208 return d.w.err 209 } 210 d.w.writeBytes(buf) 211 return d.w.err 212 } 213 214 func (d *compressor) initDeflate() { 215 d.hashHead = make([]int, hashSize) 216 d.hashPrev = make([]int, windowSize) 217 d.window = make([]byte, 2*windowSize) 218 d.hashOffset = 1 219 d.tokens = make([]token, 0, maxFlateBlockTokens+1) 220 d.length = minMatchLength - 1 221 d.offset = 0 222 d.byteAvailable = false 223 d.index = 0 224 d.hash = 0 225 d.chainHead = -1 226 } 227 228 func (d *compressor) deflate() { 229 if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { 230 return 231 } 232 233 d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) 234 if d.index < d.maxInsertIndex { 235 d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) 236 } 237 238 Loop: 239 for { 240 if d.index > d.windowEnd { 241 panic("index > windowEnd") 242 } 243 lookahead := d.windowEnd - d.index 244 if lookahead < minMatchLength+maxMatchLength { 245 if !d.sync { 246 break Loop 247 } 248 if d.index > d.windowEnd { 249 panic("index > windowEnd") 250 } 251 if lookahead == 0 { 252 // Flush current output block if any. 253 if d.byteAvailable { 254 // There is still one pending token that needs to be flushed 255 d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1]))) 256 d.byteAvailable = false 257 } 258 if len(d.tokens) > 0 { 259 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { 260 return 261 } 262 d.tokens = d.tokens[:0] 263 } 264 break Loop 265 } 266 } 267 if d.index < d.maxInsertIndex { 268 // Update the hash 269 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask 270 d.chainHead = d.hashHead[d.hash] 271 d.hashPrev[d.index&windowMask] = d.chainHead 272 d.hashHead[d.hash] = d.index + d.hashOffset 273 } 274 prevLength := d.length 275 prevOffset := d.offset 276 d.length = minMatchLength - 1 277 d.offset = 0 278 minIndex := d.index - windowSize 279 if minIndex < 0 { 280 minIndex = 0 281 } 282 283 if d.chainHead-d.hashOffset >= minIndex && 284 (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 || 285 d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) { 286 if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { 287 d.length = newLength 288 d.offset = newOffset 289 } 290 } 291 if d.fastSkipHashing != skipNever && d.length >= minMatchLength || 292 d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength { 293 // There was a match at the previous step, and the current match is 294 // not better. Output the previous match. 295 if d.fastSkipHashing != skipNever { 296 d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))) 297 } else { 298 d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))) 299 } 300 // Insert in the hash table all strings up to the end of the match. 301 // index and index-1 are already inserted. If there is not enough 302 // lookahead, the last two strings are not inserted into the hash 303 // table. 304 if d.length <= d.fastSkipHashing { 305 var newIndex int 306 if d.fastSkipHashing != skipNever { 307 newIndex = d.index + d.length 308 } else { 309 newIndex = d.index + prevLength - 1 310 } 311 for d.index++; d.index < newIndex; d.index++ { 312 if d.index < d.maxInsertIndex { 313 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask 314 // Get previous value with the same hash. 315 // Our chain should point to the previous value. 316 d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] 317 // Set the head of the hash chain to us. 318 d.hashHead[d.hash] = d.index + d.hashOffset 319 } 320 } 321 if d.fastSkipHashing == skipNever { 322 d.byteAvailable = false 323 d.length = minMatchLength - 1 324 } 325 } else { 326 // For matches this long, we don't bother inserting each individual 327 // item into the table. 328 d.index += d.length 329 if d.index < d.maxInsertIndex { 330 d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) 331 } 332 } 333 if len(d.tokens) == maxFlateBlockTokens { 334 // The block includes the current character 335 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { 336 return 337 } 338 d.tokens = d.tokens[:0] 339 } 340 } else { 341 if d.fastSkipHashing != skipNever || d.byteAvailable { 342 i := d.index - 1 343 if d.fastSkipHashing != skipNever { 344 i = d.index 345 } 346 d.tokens = append(d.tokens, literalToken(uint32(d.window[i]))) 347 if len(d.tokens) == maxFlateBlockTokens { 348 if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { 349 return 350 } 351 d.tokens = d.tokens[:0] 352 } 353 } 354 d.index++ 355 if d.fastSkipHashing == skipNever { 356 d.byteAvailable = true 357 } 358 } 359 } 360 } 361 362 func (d *compressor) fillStore(b []byte) int { 363 n := copy(d.window[d.windowEnd:], b) 364 d.windowEnd += n 365 return n 366 } 367 368 func (d *compressor) store() { 369 if d.windowEnd > 0 { 370 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 371 } 372 d.windowEnd = 0 373 } 374 375 func (d *compressor) write(b []byte) (n int, err error) { 376 n = len(b) 377 b = b[d.fill(d, b):] 378 for len(b) > 0 { 379 d.step(d) 380 b = b[d.fill(d, b):] 381 } 382 return n, d.err 383 } 384 385 func (d *compressor) syncFlush() error { 386 d.sync = true 387 d.step(d) 388 if d.err == nil { 389 d.w.writeStoredHeader(0, false) 390 d.w.flush() 391 d.err = d.w.err 392 } 393 d.sync = false 394 return d.err 395 } 396 397 func (d *compressor) init(w io.Writer, level int) (err error) { 398 d.w = newHuffmanBitWriter(w) 399 400 switch { 401 case level == NoCompression: 402 d.window = make([]byte, maxStoreBlockSize) 403 d.fill = (*compressor).fillStore 404 d.step = (*compressor).store 405 case level == DefaultCompression: 406 level = 6 407 fallthrough 408 case 1 <= level && level <= 9: 409 d.compressionLevel = levels[level] 410 d.initDeflate() 411 d.fill = (*compressor).fillDeflate 412 d.step = (*compressor).deflate 413 default: 414 return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level) 415 } 416 return nil 417 } 418 419 var zeroes [32]int 420 var bzeroes [256]byte 421 422 func (d *compressor) reset(w io.Writer) { 423 d.w.reset(w) 424 d.sync = false 425 d.err = nil 426 switch d.compressionLevel.chain { 427 case 0: 428 // level was NoCompression. 429 for i := range d.window { 430 d.window[i] = 0 431 } 432 d.windowEnd = 0 433 default: 434 d.chainHead = -1 435 for s := d.hashHead; len(s) > 0; { 436 n := copy(s, zeroes[:]) 437 s = s[n:] 438 } 439 for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] { 440 copy(s, zeroes[:]) 441 } 442 d.hashOffset = 1 443 444 d.index, d.windowEnd = 0, 0 445 for s := d.window; len(s) > 0; { 446 n := copy(s, bzeroes[:]) 447 s = s[n:] 448 } 449 d.blockStart, d.byteAvailable = 0, false 450 451 d.tokens = d.tokens[:maxFlateBlockTokens+1] 452 for i := 0; i <= maxFlateBlockTokens; i++ { 453 d.tokens[i] = 0 454 } 455 d.tokens = d.tokens[:0] 456 d.length = minMatchLength - 1 457 d.offset = 0 458 d.hash = 0 459 d.maxInsertIndex = 0 460 } 461 } 462 463 func (d *compressor) close() error { 464 d.sync = true 465 d.step(d) 466 if d.err != nil { 467 return d.err 468 } 469 if d.w.writeStoredHeader(0, true); d.w.err != nil { 470 return d.w.err 471 } 472 d.w.flush() 473 return d.w.err 474 } 475 476 // NewWriter returns a new Writer compressing data at the given level. 477 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); 478 // higher levels typically run slower but compress more. Level 0 479 // (NoCompression) does not attempt any compression; it only adds the 480 // necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default 481 // compression level. 482 // 483 // If level is in the range [-1, 9] then the error returned will be nil. 484 // Otherwise the error returned will be non-nil. 485 func NewWriter(w io.Writer, level int) (*Writer, error) { 486 var dw Writer 487 if err := dw.d.init(w, level); err != nil { 488 return nil, err 489 } 490 return &dw, nil 491 } 492 493 // NewWriterDict is like NewWriter but initializes the new 494 // Writer with a preset dictionary. The returned Writer behaves 495 // as if the dictionary had been written to it without producing 496 // any compressed output. The compressed data written to w 497 // can only be decompressed by a Reader initialized with the 498 // same dictionary. 499 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { 500 dw := &dictWriter{w, false} 501 zw, err := NewWriter(dw, level) 502 if err != nil { 503 return nil, err 504 } 505 zw.Write(dict) 506 zw.Flush() 507 dw.enabled = true 508 zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. 509 return zw, err 510 } 511 512 type dictWriter struct { 513 w io.Writer 514 enabled bool 515 } 516 517 func (w *dictWriter) Write(b []byte) (n int, err error) { 518 if w.enabled { 519 return w.w.Write(b) 520 } 521 return len(b), nil 522 } 523 524 // A Writer takes data written to it and writes the compressed 525 // form of that data to an underlying writer (see NewWriter). 526 type Writer struct { 527 d compressor 528 dict []byte 529 } 530 531 // Write writes data to w, which will eventually write the 532 // compressed form of data to its underlying writer. 533 func (w *Writer) Write(data []byte) (n int, err error) { 534 return w.d.write(data) 535 } 536 537 // Flush flushes any pending compressed data to the underlying writer. 538 // It is useful mainly in compressed network protocols, to ensure that 539 // a remote reader has enough data to reconstruct a packet. 540 // Flush does not return until the data has been written. 541 // If the underlying writer returns an error, Flush returns that error. 542 // 543 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. 544 func (w *Writer) Flush() error { 545 // For more about flushing: 546 // http://www.bolet.org/~pornin/deflate-flush.html 547 return w.d.syncFlush() 548 } 549 550 // Close flushes and closes the writer. 551 func (w *Writer) Close() error { 552 return w.d.close() 553 } 554 555 // Reset discards the writer's state and makes it equivalent to 556 // the result of NewWriter or NewWriterDict called with dst 557 // and w's level and dictionary. 558 func (w *Writer) Reset(dst io.Writer) { 559 if dw, ok := w.d.w.w.(*dictWriter); ok { 560 // w was created with NewWriterDict 561 dw.w = dst 562 w.d.reset(dw) 563 dw.enabled = false 564 w.Write(w.dict) 565 w.Flush() 566 dw.enabled = true 567 } else { 568 // w was created with NewWriter 569 w.d.reset(dst) 570 } 571 }