github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/compress/flate/inflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package flate implements the DEFLATE compressed data format, described in 6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file 7 // formats. 8 package flate 9 10 import ( 11 "bufio" 12 "io" 13 "strconv" 14 ) 15 16 const ( 17 maxCodeLen = 16 // max length of Huffman code 18 maxHist = 32768 // max history required 19 // The next three numbers come from the RFC, section 3.2.7. 20 maxLit = 286 21 maxDist = 32 22 numCodes = 19 // number of codes in Huffman meta-code 23 ) 24 25 // A CorruptInputError reports the presence of corrupt input at a given offset. 26 type CorruptInputError int64 27 28 func (e CorruptInputError) Error() string { 29 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) 30 } 31 32 // An InternalError reports an error in the flate code itself. 33 type InternalError string 34 35 func (e InternalError) Error() string { return "flate: internal error: " + string(e) } 36 37 // A ReadError reports an error encountered while reading input. 38 type ReadError struct { 39 Offset int64 // byte offset where error occurred 40 Err error // error returned by underlying Read 41 } 42 43 func (e *ReadError) Error() string { 44 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 45 } 46 47 // A WriteError reports an error encountered while writing output. 48 type WriteError struct { 49 Offset int64 // byte offset where error occurred 50 Err error // error returned by underlying Write 51 } 52 53 func (e *WriteError) Error() string { 54 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 55 } 56 57 // Note that much of the implemenation of huffmanDecoder is also copied 58 // into gen.go (in package main) for the purpose of precomputing the 59 // fixed huffman tables so they can be included statically. 60 61 // The data structure for decoding Huffman tables is based on that of 62 // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), 63 // For codes smaller than the table width, there are multiple entries 64 // (each combination of trailing bits has the same value). For codes 65 // larger than the table width, the table contains a link to an overflow 66 // table. The width of each entry in the link table is the maximum code 67 // size minus the chunk width. 68 69 // Note that you can do a lookup in the table even without all bits 70 // filled. Since the extra bits are zero, and the DEFLATE Huffman codes 71 // have the property that shorter codes come before longer ones, the 72 // bit length estimate in the result is a lower bound on the actual 73 // number of bits. 74 75 // chunk & 15 is number of bits 76 // chunk >> 4 is value, including table link 77 78 const ( 79 huffmanChunkBits = 9 80 huffmanNumChunks = 1 << huffmanChunkBits 81 huffmanCountMask = 15 82 huffmanValueShift = 4 83 ) 84 85 type huffmanDecoder struct { 86 min int // the minimum code length 87 chunks [huffmanNumChunks]uint32 // chunks as described above 88 links [][]uint32 // overflow links 89 linkMask uint32 // mask the width of the link table 90 } 91 92 // Initialize Huffman decoding tables from array of code lengths. 93 func (h *huffmanDecoder) init(bits []int) bool { 94 // Count number of codes of each length, 95 // compute min and max length. 96 var count [maxCodeLen]int 97 var min, max int 98 for _, n := range bits { 99 if n == 0 { 100 continue 101 } 102 if min == 0 || n < min { 103 min = n 104 } 105 if n > max { 106 max = n 107 } 108 count[n]++ 109 } 110 if max == 0 { 111 return false 112 } 113 114 h.min = min 115 var linkBits uint 116 var numLinks int 117 if max > huffmanChunkBits { 118 linkBits = uint(max) - huffmanChunkBits 119 numLinks = 1 << linkBits 120 h.linkMask = uint32(numLinks - 1) 121 } 122 code := 0 123 var nextcode [maxCodeLen]int 124 for i := min; i <= max; i++ { 125 if i == huffmanChunkBits+1 { 126 // create link tables 127 link := code >> 1 128 if huffmanNumChunks < link { 129 return false 130 } 131 h.links = make([][]uint32, huffmanNumChunks-link) 132 for j := uint(link); j < huffmanNumChunks; j++ { 133 reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 134 reverse >>= uint(16 - huffmanChunkBits) 135 off := j - uint(link) 136 h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) 137 h.links[off] = make([]uint32, 1<<linkBits) 138 } 139 } 140 n := count[i] 141 nextcode[i] = code 142 code += n 143 code <<= 1 144 } 145 146 for i, n := range bits { 147 if n == 0 { 148 continue 149 } 150 code := nextcode[n] 151 nextcode[n]++ 152 chunk := uint32(i<<huffmanValueShift | n) 153 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 154 reverse >>= uint(16 - n) 155 if n <= huffmanChunkBits { 156 for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { 157 h.chunks[off] = chunk 158 } 159 } else { 160 value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift 161 if value >= uint32(len(h.links)) { 162 return false 163 } 164 linktab := h.links[value] 165 reverse >>= huffmanChunkBits 166 for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { 167 linktab[off] = chunk 168 } 169 } 170 } 171 return true 172 } 173 174 // The actual read interface needed by NewReader. 175 // If the passed in io.Reader does not also have ReadByte, 176 // the NewReader will introduce its own buffering. 177 type Reader interface { 178 io.Reader 179 ReadByte() (c byte, err error) 180 } 181 182 // Decompress state. 183 type decompressor struct { 184 // Input source. 185 r Reader 186 roffset int64 187 woffset int64 188 189 // Input bits, in top of b. 190 b uint32 191 nb uint 192 193 // Huffman decoders for literal/length, distance. 194 h1, h2 huffmanDecoder 195 196 // Length arrays used to define Huffman codes. 197 bits *[maxLit + maxDist]int 198 codebits *[numCodes]int 199 200 // Output history, buffer. 201 hist *[maxHist]byte 202 hp int // current output position in buffer 203 hw int // have written hist[0:hw] already 204 hfull bool // buffer has filled at least once 205 206 // Temporary buffer (avoids repeated allocation). 207 buf [4]byte 208 209 // Next step in the decompression, 210 // and decompression state. 211 step func(*decompressor) 212 final bool 213 err error 214 toRead []byte 215 hl, hd *huffmanDecoder 216 copyLen int 217 copyDist int 218 } 219 220 func (f *decompressor) nextBlock() { 221 if f.final { 222 if f.hw != f.hp { 223 f.flush((*decompressor).nextBlock) 224 return 225 } 226 f.err = io.EOF 227 return 228 } 229 for f.nb < 1+2 { 230 if f.err = f.moreBits(); f.err != nil { 231 return 232 } 233 } 234 f.final = f.b&1 == 1 235 f.b >>= 1 236 typ := f.b & 3 237 f.b >>= 2 238 f.nb -= 1 + 2 239 switch typ { 240 case 0: 241 f.dataBlock() 242 case 1: 243 // compressed, fixed Huffman tables 244 f.hl = &fixedHuffmanDecoder 245 f.hd = nil 246 f.huffmanBlock() 247 case 2: 248 // compressed, dynamic Huffman tables 249 if f.err = f.readHuffman(); f.err != nil { 250 break 251 } 252 f.hl = &f.h1 253 f.hd = &f.h2 254 f.huffmanBlock() 255 default: 256 // 3 is reserved. 257 f.err = CorruptInputError(f.roffset) 258 } 259 } 260 261 func (f *decompressor) Read(b []byte) (int, error) { 262 for { 263 if len(f.toRead) > 0 { 264 n := copy(b, f.toRead) 265 f.toRead = f.toRead[n:] 266 return n, nil 267 } 268 if f.err != nil { 269 return 0, f.err 270 } 271 f.step(f) 272 } 273 } 274 275 func (f *decompressor) Close() error { 276 if f.err == io.EOF { 277 return nil 278 } 279 return f.err 280 } 281 282 // RFC 1951 section 3.2.7. 283 // Compression with dynamic Huffman codes 284 285 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 286 287 func (f *decompressor) readHuffman() error { 288 // HLIT[5], HDIST[5], HCLEN[4]. 289 for f.nb < 5+5+4 { 290 if err := f.moreBits(); err != nil { 291 return err 292 } 293 } 294 nlit := int(f.b&0x1F) + 257 295 if nlit > maxLit { 296 return CorruptInputError(f.roffset) 297 } 298 f.b >>= 5 299 ndist := int(f.b&0x1F) + 1 300 // maxDist is 32, so ndist is always valid. 301 f.b >>= 5 302 nclen := int(f.b&0xF) + 4 303 // numCodes is 19, so nclen is always valid. 304 f.b >>= 4 305 f.nb -= 5 + 5 + 4 306 307 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 308 for i := 0; i < nclen; i++ { 309 for f.nb < 3 { 310 if err := f.moreBits(); err != nil { 311 return err 312 } 313 } 314 f.codebits[codeOrder[i]] = int(f.b & 0x7) 315 f.b >>= 3 316 f.nb -= 3 317 } 318 for i := nclen; i < len(codeOrder); i++ { 319 f.codebits[codeOrder[i]] = 0 320 } 321 if !f.h1.init(f.codebits[0:]) { 322 return CorruptInputError(f.roffset) 323 } 324 325 // HLIT + 257 code lengths, HDIST + 1 code lengths, 326 // using the code length Huffman code. 327 for i, n := 0, nlit+ndist; i < n; { 328 x, err := f.huffSym(&f.h1) 329 if err != nil { 330 return err 331 } 332 if x < 16 { 333 // Actual length. 334 f.bits[i] = x 335 i++ 336 continue 337 } 338 // Repeat previous length or zero. 339 var rep int 340 var nb uint 341 var b int 342 switch x { 343 default: 344 return InternalError("unexpected length code") 345 case 16: 346 rep = 3 347 nb = 2 348 if i == 0 { 349 return CorruptInputError(f.roffset) 350 } 351 b = f.bits[i-1] 352 case 17: 353 rep = 3 354 nb = 3 355 b = 0 356 case 18: 357 rep = 11 358 nb = 7 359 b = 0 360 } 361 for f.nb < nb { 362 if err := f.moreBits(); err != nil { 363 return err 364 } 365 } 366 rep += int(f.b & uint32(1<<nb-1)) 367 f.b >>= nb 368 f.nb -= nb 369 if i+rep > n { 370 return CorruptInputError(f.roffset) 371 } 372 for j := 0; j < rep; j++ { 373 f.bits[i] = b 374 i++ 375 } 376 } 377 378 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 379 return CorruptInputError(f.roffset) 380 } 381 382 return nil 383 } 384 385 // Decode a single Huffman block from f. 386 // hl and hd are the Huffman states for the lit/length values 387 // and the distance values, respectively. If hd == nil, using the 388 // fixed distance encoding associated with fixed Huffman blocks. 389 func (f *decompressor) huffmanBlock() { 390 for { 391 v, err := f.huffSym(f.hl) 392 if err != nil { 393 f.err = err 394 return 395 } 396 var n uint // number of bits extra 397 var length int 398 switch { 399 case v < 256: 400 f.hist[f.hp] = byte(v) 401 f.hp++ 402 if f.hp == len(f.hist) { 403 // After the flush, continue this loop. 404 f.flush((*decompressor).huffmanBlock) 405 return 406 } 407 continue 408 case v == 256: 409 // Done with huffman block; read next block. 410 f.step = (*decompressor).nextBlock 411 return 412 // otherwise, reference to older data 413 case v < 265: 414 length = v - (257 - 3) 415 n = 0 416 case v < 269: 417 length = v*2 - (265*2 - 11) 418 n = 1 419 case v < 273: 420 length = v*4 - (269*4 - 19) 421 n = 2 422 case v < 277: 423 length = v*8 - (273*8 - 35) 424 n = 3 425 case v < 281: 426 length = v*16 - (277*16 - 67) 427 n = 4 428 case v < 285: 429 length = v*32 - (281*32 - 131) 430 n = 5 431 default: 432 length = 258 433 n = 0 434 } 435 if n > 0 { 436 for f.nb < n { 437 if err = f.moreBits(); err != nil { 438 f.err = err 439 return 440 } 441 } 442 length += int(f.b & uint32(1<<n-1)) 443 f.b >>= n 444 f.nb -= n 445 } 446 447 var dist int 448 if f.hd == nil { 449 for f.nb < 5 { 450 if err = f.moreBits(); err != nil { 451 f.err = err 452 return 453 } 454 } 455 dist = int(reverseByte[(f.b&0x1F)<<3]) 456 f.b >>= 5 457 f.nb -= 5 458 } else { 459 if dist, err = f.huffSym(f.hd); err != nil { 460 f.err = err 461 return 462 } 463 } 464 465 switch { 466 case dist < 4: 467 dist++ 468 case dist >= 30: 469 f.err = CorruptInputError(f.roffset) 470 return 471 default: 472 nb := uint(dist-2) >> 1 473 // have 1 bit in bottom of dist, need nb more. 474 extra := (dist & 1) << nb 475 for f.nb < nb { 476 if err = f.moreBits(); err != nil { 477 f.err = err 478 return 479 } 480 } 481 extra |= int(f.b & uint32(1<<nb-1)) 482 f.b >>= nb 483 f.nb -= nb 484 dist = 1<<(nb+1) + 1 + extra 485 } 486 487 // Copy history[-dist:-dist+length] into output. 488 if dist > len(f.hist) { 489 f.err = InternalError("bad history distance") 490 return 491 } 492 493 // No check on length; encoding can be prescient. 494 if !f.hfull && dist > f.hp { 495 f.err = CorruptInputError(f.roffset) 496 return 497 } 498 499 f.copyLen, f.copyDist = length, dist 500 if f.copyHist() { 501 return 502 } 503 } 504 } 505 506 // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. 507 // It reports whether the f.hist buffer is full. 508 func (f *decompressor) copyHist() bool { 509 p := f.hp - f.copyDist 510 if p < 0 { 511 p += len(f.hist) 512 } 513 for f.copyLen > 0 { 514 n := f.copyLen 515 if x := len(f.hist) - f.hp; n > x { 516 n = x 517 } 518 if x := len(f.hist) - p; n > x { 519 n = x 520 } 521 forwardCopy(f.hist[:], f.hp, p, n) 522 p += n 523 f.hp += n 524 f.copyLen -= n 525 if f.hp == len(f.hist) { 526 // After flush continue copying out of history. 527 f.flush((*decompressor).copyHuff) 528 return true 529 } 530 if p == len(f.hist) { 531 p = 0 532 } 533 } 534 return false 535 } 536 537 func (f *decompressor) copyHuff() { 538 if f.copyHist() { 539 return 540 } 541 f.huffmanBlock() 542 } 543 544 // Copy a single uncompressed data block from input to output. 545 func (f *decompressor) dataBlock() { 546 // Uncompressed. 547 // Discard current half-byte. 548 f.nb = 0 549 f.b = 0 550 551 // Length then ones-complement of length. 552 nr, err := io.ReadFull(f.r, f.buf[0:4]) 553 f.roffset += int64(nr) 554 if err != nil { 555 f.err = &ReadError{f.roffset, err} 556 return 557 } 558 n := int(f.buf[0]) | int(f.buf[1])<<8 559 nn := int(f.buf[2]) | int(f.buf[3])<<8 560 if uint16(nn) != uint16(^n) { 561 f.err = CorruptInputError(f.roffset) 562 return 563 } 564 565 if n == 0 { 566 // 0-length block means sync 567 f.flush((*decompressor).nextBlock) 568 return 569 } 570 571 f.copyLen = n 572 f.copyData() 573 } 574 575 // copyData copies f.copyLen bytes from the underlying reader into f.hist. 576 // It pauses for reads when f.hist is full. 577 func (f *decompressor) copyData() { 578 n := f.copyLen 579 for n > 0 { 580 m := len(f.hist) - f.hp 581 if m > n { 582 m = n 583 } 584 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) 585 f.roffset += int64(m) 586 if err != nil { 587 f.err = &ReadError{f.roffset, err} 588 return 589 } 590 n -= m 591 f.hp += m 592 if f.hp == len(f.hist) { 593 f.copyLen = n 594 f.flush((*decompressor).copyData) 595 return 596 } 597 } 598 f.step = (*decompressor).nextBlock 599 } 600 601 func (f *decompressor) setDict(dict []byte) { 602 if len(dict) > len(f.hist) { 603 // Will only remember the tail. 604 dict = dict[len(dict)-len(f.hist):] 605 } 606 607 f.hp = copy(f.hist[:], dict) 608 if f.hp == len(f.hist) { 609 f.hp = 0 610 f.hfull = true 611 } 612 f.hw = f.hp 613 } 614 615 func (f *decompressor) moreBits() error { 616 c, err := f.r.ReadByte() 617 if err != nil { 618 if err == io.EOF { 619 err = io.ErrUnexpectedEOF 620 } 621 return err 622 } 623 f.roffset++ 624 f.b |= uint32(c) << f.nb 625 f.nb += 8 626 return nil 627 } 628 629 // Read the next Huffman-encoded symbol from f according to h. 630 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { 631 n := uint(h.min) 632 for { 633 for f.nb < n { 634 if err := f.moreBits(); err != nil { 635 return 0, err 636 } 637 } 638 chunk := h.chunks[f.b&(huffmanNumChunks-1)] 639 n = uint(chunk & huffmanCountMask) 640 if n > huffmanChunkBits { 641 chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] 642 n = uint(chunk & huffmanCountMask) 643 } 644 if n <= f.nb { 645 f.b >>= n 646 f.nb -= n 647 return int(chunk >> huffmanValueShift), nil 648 } 649 } 650 } 651 652 // Flush any buffered output to the underlying writer. 653 func (f *decompressor) flush(step func(*decompressor)) { 654 f.toRead = f.hist[f.hw:f.hp] 655 f.woffset += int64(f.hp - f.hw) 656 f.hw = f.hp 657 if f.hp == len(f.hist) { 658 f.hp = 0 659 f.hw = 0 660 f.hfull = true 661 } 662 f.step = step 663 } 664 665 func makeReader(r io.Reader) Reader { 666 if rr, ok := r.(Reader); ok { 667 return rr 668 } 669 return bufio.NewReader(r) 670 } 671 672 // NewReader returns a new ReadCloser that can be used 673 // to read the uncompressed version of r. It is the caller's 674 // responsibility to call Close on the ReadCloser when 675 // finished reading. 676 func NewReader(r io.Reader) io.ReadCloser { 677 var f decompressor 678 f.bits = new([maxLit + maxDist]int) 679 f.codebits = new([numCodes]int) 680 f.r = makeReader(r) 681 f.hist = new([maxHist]byte) 682 f.step = (*decompressor).nextBlock 683 return &f 684 } 685 686 // NewReaderDict is like NewReader but initializes the reader 687 // with a preset dictionary. The returned Reader behaves as if 688 // the uncompressed data stream started with the given dictionary, 689 // which has already been read. NewReaderDict is typically used 690 // to read data compressed by NewWriterDict. 691 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { 692 var f decompressor 693 f.r = makeReader(r) 694 f.hist = new([maxHist]byte) 695 f.bits = new([maxLit + maxDist]int) 696 f.codebits = new([numCodes]int) 697 f.step = (*decompressor).nextBlock 698 f.setDict(dict) 699 return &f 700 }