github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/compress/flate/inflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package flate implements the DEFLATE compressed data format, described in 6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file 7 // formats. 8 package flate 9 10 import ( 11 "bufio" 12 "io" 13 "strconv" 14 ) 15 16 const ( 17 maxCodeLen = 16 // max length of Huffman code 18 maxHist = 32768 // max history required 19 // The next three numbers come from the RFC, section 3.2.7. 20 maxLit = 286 21 maxDist = 32 22 numCodes = 19 // number of codes in Huffman meta-code 23 ) 24 25 // A CorruptInputError reports the presence of corrupt input at a given offset. 26 type CorruptInputError int64 27 28 func (e CorruptInputError) Error() string { 29 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) 30 } 31 32 // An InternalError reports an error in the flate code itself. 33 type InternalError string 34 35 func (e InternalError) Error() string { return "flate: internal error: " + string(e) } 36 37 // A ReadError reports an error encountered while reading input. 38 type ReadError struct { 39 Offset int64 // byte offset where error occurred 40 Err error // error returned by underlying Read 41 } 42 43 func (e *ReadError) Error() string { 44 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 45 } 46 47 // A WriteError reports an error encountered while writing output. 48 type WriteError struct { 49 Offset int64 // byte offset where error occurred 50 Err error // error returned by underlying Write 51 } 52 53 func (e *WriteError) Error() string { 54 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 55 } 56 57 // Note that much of the implemenation of huffmanDecoder is also copied 58 // into gen.go (in package main) for the purpose of precomputing the 59 // fixed huffman tables so they can be included statically. 60 61 // The data structure for decoding Huffman tables is based on that of 62 // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), 63 // For codes smaller than the table width, there are multiple entries 64 // (each combination of trailing bits has the same value). For codes 65 // larger than the table width, the table contains a link to an overflow 66 // table. The width of each entry in the link table is the maximum code 67 // size minus the chunk width. 68 69 // Note that you can do a lookup in the table even without all bits 70 // filled. Since the extra bits are zero, and the DEFLATE Huffman codes 71 // have the property that shorter codes come before longer ones, the 72 // bit length estimate in the result is a lower bound on the actual 73 // number of bits. 74 75 // chunk & 15 is number of bits 76 // chunk >> 4 is value, including table link 77 78 const ( 79 huffmanChunkBits = 9 80 huffmanNumChunks = 1 << huffmanChunkBits 81 huffmanCountMask = 15 82 huffmanValueShift = 4 83 ) 84 85 type huffmanDecoder struct { 86 min int // the minimum code length 87 chunks [huffmanNumChunks]uint32 // chunks as described above 88 links [][]uint32 // overflow links 89 linkMask uint32 // mask the width of the link table 90 } 91 92 // Initialize Huffman decoding tables from array of code lengths. 93 func (h *huffmanDecoder) init(bits []int) bool { 94 // Count number of codes of each length, 95 // compute min and max length. 96 var count [maxCodeLen]int 97 var min, max int 98 for _, n := range bits { 99 if n == 0 { 100 continue 101 } 102 if min == 0 || n < min { 103 min = n 104 } 105 if n > max { 106 max = n 107 } 108 count[n]++ 109 } 110 if max == 0 { 111 return false 112 } 113 114 h.min = min 115 var linkBits uint 116 var numLinks int 117 if max > huffmanChunkBits { 118 linkBits = uint(max) - huffmanChunkBits 119 numLinks = 1 << linkBits 120 h.linkMask = uint32(numLinks - 1) 121 } 122 code := 0 123 var nextcode [maxCodeLen]int 124 for i := min; i <= max; i++ { 125 if i == huffmanChunkBits+1 { 126 // create link tables 127 link := code >> 1 128 h.links = make([][]uint32, huffmanNumChunks-link) 129 for j := uint(link); j < huffmanNumChunks; j++ { 130 reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 131 reverse >>= uint(16 - huffmanChunkBits) 132 off := j - uint(link) 133 h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) 134 h.links[off] = make([]uint32, 1<<linkBits) 135 } 136 } 137 n := count[i] 138 nextcode[i] = code 139 code += n 140 code <<= 1 141 } 142 143 for i, n := range bits { 144 if n == 0 { 145 continue 146 } 147 code := nextcode[n] 148 nextcode[n]++ 149 chunk := uint32(i<<huffmanValueShift | n) 150 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 151 reverse >>= uint(16 - n) 152 if n <= huffmanChunkBits { 153 for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { 154 h.chunks[off] = chunk 155 } 156 } else { 157 linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] 158 reverse >>= huffmanChunkBits 159 for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { 160 linktab[off] = chunk 161 } 162 } 163 } 164 return true 165 } 166 167 // The actual read interface needed by NewReader. 168 // If the passed in io.Reader does not also have ReadByte, 169 // the NewReader will introduce its own buffering. 170 type Reader interface { 171 io.Reader 172 ReadByte() (c byte, err error) 173 } 174 175 // Decompress state. 176 type decompressor struct { 177 // Input source. 178 r Reader 179 roffset int64 180 woffset int64 181 182 // Input bits, in top of b. 183 b uint32 184 nb uint 185 186 // Huffman decoders for literal/length, distance. 187 h1, h2 huffmanDecoder 188 189 // Length arrays used to define Huffman codes. 190 bits *[maxLit + maxDist]int 191 codebits *[numCodes]int 192 193 // Output history, buffer. 194 hist *[maxHist]byte 195 hp int // current output position in buffer 196 hw int // have written hist[0:hw] already 197 hfull bool // buffer has filled at least once 198 199 // Temporary buffer (avoids repeated allocation). 200 buf [4]byte 201 202 // Next step in the decompression, 203 // and decompression state. 204 step func(*decompressor) 205 final bool 206 err error 207 toRead []byte 208 hl, hd *huffmanDecoder 209 copyLen int 210 copyDist int 211 } 212 213 func (f *decompressor) nextBlock() { 214 if f.final { 215 if f.hw != f.hp { 216 f.flush((*decompressor).nextBlock) 217 return 218 } 219 f.err = io.EOF 220 return 221 } 222 for f.nb < 1+2 { 223 if f.err = f.moreBits(); f.err != nil { 224 return 225 } 226 } 227 f.final = f.b&1 == 1 228 f.b >>= 1 229 typ := f.b & 3 230 f.b >>= 2 231 f.nb -= 1 + 2 232 switch typ { 233 case 0: 234 f.dataBlock() 235 case 1: 236 // compressed, fixed Huffman tables 237 f.hl = &fixedHuffmanDecoder 238 f.hd = nil 239 f.huffmanBlock() 240 case 2: 241 // compressed, dynamic Huffman tables 242 if f.err = f.readHuffman(); f.err != nil { 243 break 244 } 245 f.hl = &f.h1 246 f.hd = &f.h2 247 f.huffmanBlock() 248 default: 249 // 3 is reserved. 250 f.err = CorruptInputError(f.roffset) 251 } 252 } 253 254 func (f *decompressor) Read(b []byte) (int, error) { 255 for { 256 if len(f.toRead) > 0 { 257 n := copy(b, f.toRead) 258 f.toRead = f.toRead[n:] 259 return n, nil 260 } 261 if f.err != nil { 262 return 0, f.err 263 } 264 f.step(f) 265 } 266 } 267 268 func (f *decompressor) Close() error { 269 if f.err == io.EOF { 270 return nil 271 } 272 return f.err 273 } 274 275 // RFC 1951 section 3.2.7. 276 // Compression with dynamic Huffman codes 277 278 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 279 280 func (f *decompressor) readHuffman() error { 281 // HLIT[5], HDIST[5], HCLEN[4]. 282 for f.nb < 5+5+4 { 283 if err := f.moreBits(); err != nil { 284 return err 285 } 286 } 287 nlit := int(f.b&0x1F) + 257 288 if nlit > maxLit { 289 return CorruptInputError(f.roffset) 290 } 291 f.b >>= 5 292 ndist := int(f.b&0x1F) + 1 293 // maxDist is 32, so ndist is always valid. 294 f.b >>= 5 295 nclen := int(f.b&0xF) + 4 296 // numCodes is 19, so nclen is always valid. 297 f.b >>= 4 298 f.nb -= 5 + 5 + 4 299 300 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 301 for i := 0; i < nclen; i++ { 302 for f.nb < 3 { 303 if err := f.moreBits(); err != nil { 304 return err 305 } 306 } 307 f.codebits[codeOrder[i]] = int(f.b & 0x7) 308 f.b >>= 3 309 f.nb -= 3 310 } 311 for i := nclen; i < len(codeOrder); i++ { 312 f.codebits[codeOrder[i]] = 0 313 } 314 if !f.h1.init(f.codebits[0:]) { 315 return CorruptInputError(f.roffset) 316 } 317 318 // HLIT + 257 code lengths, HDIST + 1 code lengths, 319 // using the code length Huffman code. 320 for i, n := 0, nlit+ndist; i < n; { 321 x, err := f.huffSym(&f.h1) 322 if err != nil { 323 return err 324 } 325 if x < 16 { 326 // Actual length. 327 f.bits[i] = x 328 i++ 329 continue 330 } 331 // Repeat previous length or zero. 332 var rep int 333 var nb uint 334 var b int 335 switch x { 336 default: 337 return InternalError("unexpected length code") 338 case 16: 339 rep = 3 340 nb = 2 341 if i == 0 { 342 return CorruptInputError(f.roffset) 343 } 344 b = f.bits[i-1] 345 case 17: 346 rep = 3 347 nb = 3 348 b = 0 349 case 18: 350 rep = 11 351 nb = 7 352 b = 0 353 } 354 for f.nb < nb { 355 if err := f.moreBits(); err != nil { 356 return err 357 } 358 } 359 rep += int(f.b & uint32(1<<nb-1)) 360 f.b >>= nb 361 f.nb -= nb 362 if i+rep > n { 363 return CorruptInputError(f.roffset) 364 } 365 for j := 0; j < rep; j++ { 366 f.bits[i] = b 367 i++ 368 } 369 } 370 371 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 372 return CorruptInputError(f.roffset) 373 } 374 375 return nil 376 } 377 378 // Decode a single Huffman block from f. 379 // hl and hd are the Huffman states for the lit/length values 380 // and the distance values, respectively. If hd == nil, using the 381 // fixed distance encoding associated with fixed Huffman blocks. 382 func (f *decompressor) huffmanBlock() { 383 for { 384 v, err := f.huffSym(f.hl) 385 if err != nil { 386 f.err = err 387 return 388 } 389 var n uint // number of bits extra 390 var length int 391 switch { 392 case v < 256: 393 f.hist[f.hp] = byte(v) 394 f.hp++ 395 if f.hp == len(f.hist) { 396 // After the flush, continue this loop. 397 f.flush((*decompressor).huffmanBlock) 398 return 399 } 400 continue 401 case v == 256: 402 // Done with huffman block; read next block. 403 f.step = (*decompressor).nextBlock 404 return 405 // otherwise, reference to older data 406 case v < 265: 407 length = v - (257 - 3) 408 n = 0 409 case v < 269: 410 length = v*2 - (265*2 - 11) 411 n = 1 412 case v < 273: 413 length = v*4 - (269*4 - 19) 414 n = 2 415 case v < 277: 416 length = v*8 - (273*8 - 35) 417 n = 3 418 case v < 281: 419 length = v*16 - (277*16 - 67) 420 n = 4 421 case v < 285: 422 length = v*32 - (281*32 - 131) 423 n = 5 424 default: 425 length = 258 426 n = 0 427 } 428 if n > 0 { 429 for f.nb < n { 430 if err = f.moreBits(); err != nil { 431 f.err = err 432 return 433 } 434 } 435 length += int(f.b & uint32(1<<n-1)) 436 f.b >>= n 437 f.nb -= n 438 } 439 440 var dist int 441 if f.hd == nil { 442 for f.nb < 5 { 443 if err = f.moreBits(); err != nil { 444 f.err = err 445 return 446 } 447 } 448 dist = int(reverseByte[(f.b&0x1F)<<3]) 449 f.b >>= 5 450 f.nb -= 5 451 } else { 452 if dist, err = f.huffSym(f.hd); err != nil { 453 f.err = err 454 return 455 } 456 } 457 458 switch { 459 case dist < 4: 460 dist++ 461 case dist >= 30: 462 f.err = CorruptInputError(f.roffset) 463 return 464 default: 465 nb := uint(dist-2) >> 1 466 // have 1 bit in bottom of dist, need nb more. 467 extra := (dist & 1) << nb 468 for f.nb < nb { 469 if err = f.moreBits(); err != nil { 470 f.err = err 471 return 472 } 473 } 474 extra |= int(f.b & uint32(1<<nb-1)) 475 f.b >>= nb 476 f.nb -= nb 477 dist = 1<<(nb+1) + 1 + extra 478 } 479 480 // Copy history[-dist:-dist+length] into output. 481 if dist > len(f.hist) { 482 f.err = InternalError("bad history distance") 483 return 484 } 485 486 // No check on length; encoding can be prescient. 487 if !f.hfull && dist > f.hp { 488 f.err = CorruptInputError(f.roffset) 489 return 490 } 491 492 f.copyLen, f.copyDist = length, dist 493 if f.copyHist() { 494 return 495 } 496 } 497 } 498 499 // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. 500 // It reports whether the f.hist buffer is full. 501 func (f *decompressor) copyHist() bool { 502 p := f.hp - f.copyDist 503 if p < 0 { 504 p += len(f.hist) 505 } 506 for f.copyLen > 0 { 507 n := f.copyLen 508 if x := len(f.hist) - f.hp; n > x { 509 n = x 510 } 511 if x := len(f.hist) - p; n > x { 512 n = x 513 } 514 forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n]) 515 p += n 516 f.hp += n 517 f.copyLen -= n 518 if f.hp == len(f.hist) { 519 // After flush continue copying out of history. 520 f.flush((*decompressor).copyHuff) 521 return true 522 } 523 if p == len(f.hist) { 524 p = 0 525 } 526 } 527 return false 528 } 529 530 func (f *decompressor) copyHuff() { 531 if f.copyHist() { 532 return 533 } 534 f.huffmanBlock() 535 } 536 537 // Copy a single uncompressed data block from input to output. 538 func (f *decompressor) dataBlock() { 539 // Uncompressed. 540 // Discard current half-byte. 541 f.nb = 0 542 f.b = 0 543 544 // Length then ones-complement of length. 545 nr, err := io.ReadFull(f.r, f.buf[0:4]) 546 f.roffset += int64(nr) 547 if err != nil { 548 f.err = &ReadError{f.roffset, err} 549 return 550 } 551 n := int(f.buf[0]) | int(f.buf[1])<<8 552 nn := int(f.buf[2]) | int(f.buf[3])<<8 553 if uint16(nn) != uint16(^n) { 554 f.err = CorruptInputError(f.roffset) 555 return 556 } 557 558 if n == 0 { 559 // 0-length block means sync 560 f.flush((*decompressor).nextBlock) 561 return 562 } 563 564 f.copyLen = n 565 f.copyData() 566 } 567 568 // copyData copies f.copyLen bytes from the underlying reader into f.hist. 569 // It pauses for reads when f.hist is full. 570 func (f *decompressor) copyData() { 571 n := f.copyLen 572 for n > 0 { 573 m := len(f.hist) - f.hp 574 if m > n { 575 m = n 576 } 577 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) 578 f.roffset += int64(m) 579 if err != nil { 580 f.err = &ReadError{f.roffset, err} 581 return 582 } 583 n -= m 584 f.hp += m 585 if f.hp == len(f.hist) { 586 f.copyLen = n 587 f.flush((*decompressor).copyData) 588 return 589 } 590 } 591 f.step = (*decompressor).nextBlock 592 } 593 594 func (f *decompressor) setDict(dict []byte) { 595 if len(dict) > len(f.hist) { 596 // Will only remember the tail. 597 dict = dict[len(dict)-len(f.hist):] 598 } 599 600 f.hp = copy(f.hist[:], dict) 601 if f.hp == len(f.hist) { 602 f.hp = 0 603 f.hfull = true 604 } 605 f.hw = f.hp 606 } 607 608 func (f *decompressor) moreBits() error { 609 c, err := f.r.ReadByte() 610 if err != nil { 611 if err == io.EOF { 612 err = io.ErrUnexpectedEOF 613 } 614 return err 615 } 616 f.roffset++ 617 f.b |= uint32(c) << f.nb 618 f.nb += 8 619 return nil 620 } 621 622 // Read the next Huffman-encoded symbol from f according to h. 623 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { 624 n := uint(h.min) 625 for { 626 for f.nb < n { 627 if err := f.moreBits(); err != nil { 628 return 0, err 629 } 630 } 631 chunk := h.chunks[f.b&(huffmanNumChunks-1)] 632 n = uint(chunk & huffmanCountMask) 633 if n > huffmanChunkBits { 634 chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] 635 n = uint(chunk & huffmanCountMask) 636 } 637 if n <= f.nb { 638 f.b >>= n 639 f.nb -= n 640 return int(chunk >> huffmanValueShift), nil 641 } 642 } 643 } 644 645 // Flush any buffered output to the underlying writer. 646 func (f *decompressor) flush(step func(*decompressor)) { 647 f.toRead = f.hist[f.hw:f.hp] 648 f.woffset += int64(f.hp - f.hw) 649 f.hw = f.hp 650 if f.hp == len(f.hist) { 651 f.hp = 0 652 f.hw = 0 653 f.hfull = true 654 } 655 f.step = step 656 } 657 658 func makeReader(r io.Reader) Reader { 659 if rr, ok := r.(Reader); ok { 660 return rr 661 } 662 return bufio.NewReader(r) 663 } 664 665 // NewReader returns a new ReadCloser that can be used 666 // to read the uncompressed version of r. It is the caller's 667 // responsibility to call Close on the ReadCloser when 668 // finished reading. 669 func NewReader(r io.Reader) io.ReadCloser { 670 var f decompressor 671 f.bits = new([maxLit + maxDist]int) 672 f.codebits = new([numCodes]int) 673 f.r = makeReader(r) 674 f.hist = new([maxHist]byte) 675 f.step = (*decompressor).nextBlock 676 return &f 677 } 678 679 // NewReaderDict is like NewReader but initializes the reader 680 // with a preset dictionary. The returned Reader behaves as if 681 // the uncompressed data stream started with the given dictionary, 682 // which has already been read. NewReaderDict is typically used 683 // to read data compressed by NewWriterDict. 684 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { 685 var f decompressor 686 f.r = makeReader(r) 687 f.hist = new([maxHist]byte) 688 f.bits = new([maxLit + maxDist]int) 689 f.codebits = new([numCodes]int) 690 f.step = (*decompressor).nextBlock 691 f.setDict(dict) 692 return &f 693 }