github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/compress/flate/inflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package flate implements the DEFLATE compressed data format, described in 6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file 7 // formats. 8 package flate 9 10 import ( 11 "bufio" 12 "io" 13 "strconv" 14 ) 15 16 const ( 17 maxCodeLen = 16 // max length of Huffman code 18 maxHist = 32768 // max history required 19 // The next three numbers come from the RFC, section 3.2.7. 20 maxLit = 286 21 maxDist = 32 22 numCodes = 19 // number of codes in Huffman meta-code 23 ) 24 25 // A CorruptInputError reports the presence of corrupt input at a given offset. 26 type CorruptInputError int64 27 28 func (e CorruptInputError) Error() string { 29 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) 30 } 31 32 // An InternalError reports an error in the flate code itself. 33 type InternalError string 34 35 func (e InternalError) Error() string { return "flate: internal error: " + string(e) } 36 37 // A ReadError reports an error encountered while reading input. 38 type ReadError struct { 39 Offset int64 // byte offset where error occurred 40 Err error // error returned by underlying Read 41 } 42 43 func (e *ReadError) Error() string { 44 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 45 } 46 47 // A WriteError reports an error encountered while writing output. 48 type WriteError struct { 49 Offset int64 // byte offset where error occurred 50 Err error // error returned by underlying Write 51 } 52 53 func (e *WriteError) Error() string { 54 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 55 } 56 57 // Note that much of the implemenation of huffmanDecoder is also copied 58 // into gen.go (in package main) for the purpose of precomputing the 59 // fixed huffman tables so they can be included statically. 60 61 // The data structure for decoding Huffman tables is based on that of 62 // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), 63 // For codes smaller than the table width, there are multiple entries 64 // (each combination of trailing bits has the same value). For codes 65 // larger than the table width, the table contains a link to an overflow 66 // table. The width of each entry in the link table is the maximum code 67 // size minus the chunk width. 68 69 // Note that you can do a lookup in the table even without all bits 70 // filled. Since the extra bits are zero, and the DEFLATE Huffman codes 71 // have the property that shorter codes come before longer ones, the 72 // bit length estimate in the result is a lower bound on the actual 73 // number of bits. 74 75 // chunk & 15 is number of bits 76 // chunk >> 4 is value, including table link 77 78 const ( 79 huffmanChunkBits = 9 80 huffmanNumChunks = 1 << huffmanChunkBits 81 huffmanCountMask = 15 82 huffmanValueShift = 4 83 ) 84 85 type huffmanDecoder struct { 86 min int // the minimum code length 87 chunks [huffmanNumChunks]uint32 // chunks as described above 88 links [][]uint32 // overflow links 89 linkMask uint32 // mask the width of the link table 90 } 91 92 // Initialize Huffman decoding tables from array of code lengths. 93 func (h *huffmanDecoder) init(bits []int) bool { 94 if h.min != 0 { 95 *h = huffmanDecoder{} 96 } 97 98 // Count number of codes of each length, 99 // compute min and max length. 100 var count [maxCodeLen]int 101 var min, max int 102 for _, n := range bits { 103 if n == 0 { 104 continue 105 } 106 if min == 0 || n < min { 107 min = n 108 } 109 if n > max { 110 max = n 111 } 112 count[n]++ 113 } 114 if max == 0 { 115 return false 116 } 117 118 h.min = min 119 var linkBits uint 120 var numLinks int 121 if max > huffmanChunkBits { 122 linkBits = uint(max) - huffmanChunkBits 123 numLinks = 1 << linkBits 124 h.linkMask = uint32(numLinks - 1) 125 } 126 code := 0 127 var nextcode [maxCodeLen]int 128 for i := min; i <= max; i++ { 129 if i == huffmanChunkBits+1 { 130 // create link tables 131 link := code >> 1 132 if huffmanNumChunks < link { 133 return false 134 } 135 h.links = make([][]uint32, huffmanNumChunks-link) 136 for j := uint(link); j < huffmanNumChunks; j++ { 137 reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 138 reverse >>= uint(16 - huffmanChunkBits) 139 off := j - uint(link) 140 h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) 141 h.links[off] = make([]uint32, 1<<linkBits) 142 } 143 } 144 n := count[i] 145 nextcode[i] = code 146 code += n 147 code <<= 1 148 } 149 150 for i, n := range bits { 151 if n == 0 { 152 continue 153 } 154 code := nextcode[n] 155 nextcode[n]++ 156 chunk := uint32(i<<huffmanValueShift | n) 157 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 158 reverse >>= uint(16 - n) 159 if n <= huffmanChunkBits { 160 for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { 161 h.chunks[off] = chunk 162 } 163 } else { 164 value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift 165 if value >= uint32(len(h.links)) { 166 return false 167 } 168 linktab := h.links[value] 169 reverse >>= huffmanChunkBits 170 for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { 171 linktab[off] = chunk 172 } 173 } 174 } 175 return true 176 } 177 178 // The actual read interface needed by NewReader. 179 // If the passed in io.Reader does not also have ReadByte, 180 // the NewReader will introduce its own buffering. 181 type Reader interface { 182 io.Reader 183 ReadByte() (c byte, err error) 184 } 185 186 // Decompress state. 187 type decompressor struct { 188 // Input source. 189 r Reader 190 roffset int64 191 woffset int64 192 193 // Input bits, in top of b. 194 b uint32 195 nb uint 196 197 // Huffman decoders for literal/length, distance. 198 h1, h2 huffmanDecoder 199 200 // Length arrays used to define Huffman codes. 201 bits *[maxLit + maxDist]int 202 codebits *[numCodes]int 203 204 // Output history, buffer. 205 hist *[maxHist]byte 206 hp int // current output position in buffer 207 hw int // have written hist[0:hw] already 208 hfull bool // buffer has filled at least once 209 210 // Temporary buffer (avoids repeated allocation). 211 buf [4]byte 212 213 // Next step in the decompression, 214 // and decompression state. 215 step func(*decompressor) 216 final bool 217 err error 218 toRead []byte 219 hl, hd *huffmanDecoder 220 copyLen int 221 copyDist int 222 } 223 224 func (f *decompressor) nextBlock() { 225 if f.final { 226 if f.hw != f.hp { 227 f.flush((*decompressor).nextBlock) 228 return 229 } 230 f.err = io.EOF 231 return 232 } 233 for f.nb < 1+2 { 234 if f.err = f.moreBits(); f.err != nil { 235 return 236 } 237 } 238 f.final = f.b&1 == 1 239 f.b >>= 1 240 typ := f.b & 3 241 f.b >>= 2 242 f.nb -= 1 + 2 243 switch typ { 244 case 0: 245 f.dataBlock() 246 case 1: 247 // compressed, fixed Huffman tables 248 f.hl = &fixedHuffmanDecoder 249 f.hd = nil 250 f.huffmanBlock() 251 case 2: 252 // compressed, dynamic Huffman tables 253 if f.err = f.readHuffman(); f.err != nil { 254 break 255 } 256 f.hl = &f.h1 257 f.hd = &f.h2 258 f.huffmanBlock() 259 default: 260 // 3 is reserved. 261 f.err = CorruptInputError(f.roffset) 262 } 263 } 264 265 func (f *decompressor) Read(b []byte) (int, error) { 266 for { 267 if len(f.toRead) > 0 { 268 n := copy(b, f.toRead) 269 f.toRead = f.toRead[n:] 270 return n, nil 271 } 272 if f.err != nil { 273 return 0, f.err 274 } 275 f.step(f) 276 } 277 } 278 279 func (f *decompressor) Close() error { 280 if f.err == io.EOF { 281 return nil 282 } 283 return f.err 284 } 285 286 // RFC 1951 section 3.2.7. 287 // Compression with dynamic Huffman codes 288 289 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 290 291 func (f *decompressor) readHuffman() error { 292 // HLIT[5], HDIST[5], HCLEN[4]. 293 for f.nb < 5+5+4 { 294 if err := f.moreBits(); err != nil { 295 return err 296 } 297 } 298 nlit := int(f.b&0x1F) + 257 299 if nlit > maxLit { 300 return CorruptInputError(f.roffset) 301 } 302 f.b >>= 5 303 ndist := int(f.b&0x1F) + 1 304 // maxDist is 32, so ndist is always valid. 305 f.b >>= 5 306 nclen := int(f.b&0xF) + 4 307 // numCodes is 19, so nclen is always valid. 308 f.b >>= 4 309 f.nb -= 5 + 5 + 4 310 311 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 312 for i := 0; i < nclen; i++ { 313 for f.nb < 3 { 314 if err := f.moreBits(); err != nil { 315 return err 316 } 317 } 318 f.codebits[codeOrder[i]] = int(f.b & 0x7) 319 f.b >>= 3 320 f.nb -= 3 321 } 322 for i := nclen; i < len(codeOrder); i++ { 323 f.codebits[codeOrder[i]] = 0 324 } 325 if !f.h1.init(f.codebits[0:]) { 326 return CorruptInputError(f.roffset) 327 } 328 329 // HLIT + 257 code lengths, HDIST + 1 code lengths, 330 // using the code length Huffman code. 331 for i, n := 0, nlit+ndist; i < n; { 332 x, err := f.huffSym(&f.h1) 333 if err != nil { 334 return err 335 } 336 if x < 16 { 337 // Actual length. 338 f.bits[i] = x 339 i++ 340 continue 341 } 342 // Repeat previous length or zero. 343 var rep int 344 var nb uint 345 var b int 346 switch x { 347 default: 348 return InternalError("unexpected length code") 349 case 16: 350 rep = 3 351 nb = 2 352 if i == 0 { 353 return CorruptInputError(f.roffset) 354 } 355 b = f.bits[i-1] 356 case 17: 357 rep = 3 358 nb = 3 359 b = 0 360 case 18: 361 rep = 11 362 nb = 7 363 b = 0 364 } 365 for f.nb < nb { 366 if err := f.moreBits(); err != nil { 367 return err 368 } 369 } 370 rep += int(f.b & uint32(1<<nb-1)) 371 f.b >>= nb 372 f.nb -= nb 373 if i+rep > n { 374 return CorruptInputError(f.roffset) 375 } 376 for j := 0; j < rep; j++ { 377 f.bits[i] = b 378 i++ 379 } 380 } 381 382 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 383 return CorruptInputError(f.roffset) 384 } 385 386 return nil 387 } 388 389 // Decode a single Huffman block from f. 390 // hl and hd are the Huffman states for the lit/length values 391 // and the distance values, respectively. If hd == nil, using the 392 // fixed distance encoding associated with fixed Huffman blocks. 393 func (f *decompressor) huffmanBlock() { 394 for { 395 v, err := f.huffSym(f.hl) 396 if err != nil { 397 f.err = err 398 return 399 } 400 var n uint // number of bits extra 401 var length int 402 switch { 403 case v < 256: 404 f.hist[f.hp] = byte(v) 405 f.hp++ 406 if f.hp == len(f.hist) { 407 // After the flush, continue this loop. 408 f.flush((*decompressor).huffmanBlock) 409 return 410 } 411 continue 412 case v == 256: 413 // Done with huffman block; read next block. 414 f.step = (*decompressor).nextBlock 415 return 416 // otherwise, reference to older data 417 case v < 265: 418 length = v - (257 - 3) 419 n = 0 420 case v < 269: 421 length = v*2 - (265*2 - 11) 422 n = 1 423 case v < 273: 424 length = v*4 - (269*4 - 19) 425 n = 2 426 case v < 277: 427 length = v*8 - (273*8 - 35) 428 n = 3 429 case v < 281: 430 length = v*16 - (277*16 - 67) 431 n = 4 432 case v < 285: 433 length = v*32 - (281*32 - 131) 434 n = 5 435 default: 436 length = 258 437 n = 0 438 } 439 if n > 0 { 440 for f.nb < n { 441 if err = f.moreBits(); err != nil { 442 f.err = err 443 return 444 } 445 } 446 length += int(f.b & uint32(1<<n-1)) 447 f.b >>= n 448 f.nb -= n 449 } 450 451 var dist int 452 if f.hd == nil { 453 for f.nb < 5 { 454 if err = f.moreBits(); err != nil { 455 f.err = err 456 return 457 } 458 } 459 dist = int(reverseByte[(f.b&0x1F)<<3]) 460 f.b >>= 5 461 f.nb -= 5 462 } else { 463 if dist, err = f.huffSym(f.hd); err != nil { 464 f.err = err 465 return 466 } 467 } 468 469 switch { 470 case dist < 4: 471 dist++ 472 case dist >= 30: 473 f.err = CorruptInputError(f.roffset) 474 return 475 default: 476 nb := uint(dist-2) >> 1 477 // have 1 bit in bottom of dist, need nb more. 478 extra := (dist & 1) << nb 479 for f.nb < nb { 480 if err = f.moreBits(); err != nil { 481 f.err = err 482 return 483 } 484 } 485 extra |= int(f.b & uint32(1<<nb-1)) 486 f.b >>= nb 487 f.nb -= nb 488 dist = 1<<(nb+1) + 1 + extra 489 } 490 491 // Copy history[-dist:-dist+length] into output. 492 if dist > len(f.hist) { 493 f.err = InternalError("bad history distance") 494 return 495 } 496 497 // No check on length; encoding can be prescient. 498 if !f.hfull && dist > f.hp { 499 f.err = CorruptInputError(f.roffset) 500 return 501 } 502 503 f.copyLen, f.copyDist = length, dist 504 if f.copyHist() { 505 return 506 } 507 } 508 } 509 510 // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. 511 // It reports whether the f.hist buffer is full. 512 func (f *decompressor) copyHist() bool { 513 p := f.hp - f.copyDist 514 if p < 0 { 515 p += len(f.hist) 516 } 517 for f.copyLen > 0 { 518 n := f.copyLen 519 if x := len(f.hist) - f.hp; n > x { 520 n = x 521 } 522 if x := len(f.hist) - p; n > x { 523 n = x 524 } 525 forwardCopy(f.hist[:], f.hp, p, n) 526 p += n 527 f.hp += n 528 f.copyLen -= n 529 if f.hp == len(f.hist) { 530 // After flush continue copying out of history. 531 f.flush((*decompressor).copyHuff) 532 return true 533 } 534 if p == len(f.hist) { 535 p = 0 536 } 537 } 538 return false 539 } 540 541 func (f *decompressor) copyHuff() { 542 if f.copyHist() { 543 return 544 } 545 f.huffmanBlock() 546 } 547 548 // Copy a single uncompressed data block from input to output. 549 func (f *decompressor) dataBlock() { 550 // Uncompressed. 551 // Discard current half-byte. 552 f.nb = 0 553 f.b = 0 554 555 // Length then ones-complement of length. 556 nr, err := io.ReadFull(f.r, f.buf[0:4]) 557 f.roffset += int64(nr) 558 if err != nil { 559 f.err = &ReadError{f.roffset, err} 560 return 561 } 562 n := int(f.buf[0]) | int(f.buf[1])<<8 563 nn := int(f.buf[2]) | int(f.buf[3])<<8 564 if uint16(nn) != uint16(^n) { 565 f.err = CorruptInputError(f.roffset) 566 return 567 } 568 569 if n == 0 { 570 // 0-length block means sync 571 f.flush((*decompressor).nextBlock) 572 return 573 } 574 575 f.copyLen = n 576 f.copyData() 577 } 578 579 // copyData copies f.copyLen bytes from the underlying reader into f.hist. 580 // It pauses for reads when f.hist is full. 581 func (f *decompressor) copyData() { 582 n := f.copyLen 583 for n > 0 { 584 m := len(f.hist) - f.hp 585 if m > n { 586 m = n 587 } 588 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) 589 f.roffset += int64(m) 590 if err != nil { 591 f.err = &ReadError{f.roffset, err} 592 return 593 } 594 n -= m 595 f.hp += m 596 if f.hp == len(f.hist) { 597 f.copyLen = n 598 f.flush((*decompressor).copyData) 599 return 600 } 601 } 602 f.step = (*decompressor).nextBlock 603 } 604 605 func (f *decompressor) setDict(dict []byte) { 606 if len(dict) > len(f.hist) { 607 // Will only remember the tail. 608 dict = dict[len(dict)-len(f.hist):] 609 } 610 611 f.hp = copy(f.hist[:], dict) 612 if f.hp == len(f.hist) { 613 f.hp = 0 614 f.hfull = true 615 } 616 f.hw = f.hp 617 } 618 619 func (f *decompressor) moreBits() error { 620 c, err := f.r.ReadByte() 621 if err != nil { 622 if err == io.EOF { 623 err = io.ErrUnexpectedEOF 624 } 625 return err 626 } 627 f.roffset++ 628 f.b |= uint32(c) << f.nb 629 f.nb += 8 630 return nil 631 } 632 633 // Read the next Huffman-encoded symbol from f according to h. 634 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { 635 n := uint(h.min) 636 for { 637 for f.nb < n { 638 if err := f.moreBits(); err != nil { 639 return 0, err 640 } 641 } 642 chunk := h.chunks[f.b&(huffmanNumChunks-1)] 643 n = uint(chunk & huffmanCountMask) 644 if n > huffmanChunkBits { 645 chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] 646 n = uint(chunk & huffmanCountMask) 647 if n == 0 { 648 f.err = CorruptInputError(f.roffset) 649 return 0, f.err 650 } 651 } 652 if n <= f.nb { 653 f.b >>= n 654 f.nb -= n 655 return int(chunk >> huffmanValueShift), nil 656 } 657 } 658 } 659 660 // Flush any buffered output to the underlying writer. 661 func (f *decompressor) flush(step func(*decompressor)) { 662 f.toRead = f.hist[f.hw:f.hp] 663 f.woffset += int64(f.hp - f.hw) 664 f.hw = f.hp 665 if f.hp == len(f.hist) { 666 f.hp = 0 667 f.hw = 0 668 f.hfull = true 669 } 670 f.step = step 671 } 672 673 func makeReader(r io.Reader) Reader { 674 if rr, ok := r.(Reader); ok { 675 return rr 676 } 677 return bufio.NewReader(r) 678 } 679 680 // NewReader returns a new ReadCloser that can be used 681 // to read the uncompressed version of r. It is the caller's 682 // responsibility to call Close on the ReadCloser when 683 // finished reading. 684 func NewReader(r io.Reader) io.ReadCloser { 685 var f decompressor 686 f.bits = new([maxLit + maxDist]int) 687 f.codebits = new([numCodes]int) 688 f.r = makeReader(r) 689 f.hist = new([maxHist]byte) 690 f.step = (*decompressor).nextBlock 691 return &f 692 } 693 694 // NewReaderDict is like NewReader but initializes the reader 695 // with a preset dictionary. The returned Reader behaves as if 696 // the uncompressed data stream started with the given dictionary, 697 // which has already been read. NewReaderDict is typically used 698 // to read data compressed by NewWriterDict. 699 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { 700 var f decompressor 701 f.r = makeReader(r) 702 f.hist = new([maxHist]byte) 703 f.bits = new([maxLit + maxDist]int) 704 f.codebits = new([numCodes]int) 705 f.step = (*decompressor).nextBlock 706 f.setDict(dict) 707 return &f 708 }