github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/compress/flate/inflate.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen.go -output fixedhuff.go 6 7 // Package flate implements the DEFLATE compressed data format, described in 8 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file 9 // formats. 10 package flate 11 12 import ( 13 "bufio" 14 "io" 15 "strconv" 16 ) 17 18 const ( 19 maxCodeLen = 16 // max length of Huffman code 20 maxHist = 32768 // max history required 21 // The next three numbers come from the RFC, section 3.2.7. 22 maxLit = 286 23 maxDist = 32 24 numCodes = 19 // number of codes in Huffman meta-code 25 ) 26 27 // A CorruptInputError reports the presence of corrupt input at a given offset. 28 type CorruptInputError int64 29 30 func (e CorruptInputError) Error() string { 31 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) 32 } 33 34 // An InternalError reports an error in the flate code itself. 35 type InternalError string 36 37 func (e InternalError) Error() string { return "flate: internal error: " + string(e) } 38 39 // A ReadError reports an error encountered while reading input. 40 type ReadError struct { 41 Offset int64 // byte offset where error occurred 42 Err error // error returned by underlying Read 43 } 44 45 func (e *ReadError) Error() string { 46 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 47 } 48 49 // A WriteError reports an error encountered while writing output. 50 type WriteError struct { 51 Offset int64 // byte offset where error occurred 52 Err error // error returned by underlying Write 53 } 54 55 func (e *WriteError) Error() string { 56 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 57 } 58 59 // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to 60 // to switch to a new underlying Reader. This permits reusing a ReadCloser 61 // instead of allocating a new one. 62 type Resetter interface { 63 // Reset discards any buffered data and resets the Resetter as if it was 64 // newly initialized with the given reader. 65 Reset(r io.Reader, dict []byte) error 66 } 67 68 // Note that much of the implementation of huffmanDecoder is also copied 69 // into gen.go (in package main) for the purpose of precomputing the 70 // fixed huffman tables so they can be included statically. 71 72 // The data structure for decoding Huffman tables is based on that of 73 // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), 74 // For codes smaller than the table width, there are multiple entries 75 // (each combination of trailing bits has the same value). For codes 76 // larger than the table width, the table contains a link to an overflow 77 // table. The width of each entry in the link table is the maximum code 78 // size minus the chunk width. 79 80 // Note that you can do a lookup in the table even without all bits 81 // filled. Since the extra bits are zero, and the DEFLATE Huffman codes 82 // have the property that shorter codes come before longer ones, the 83 // bit length estimate in the result is a lower bound on the actual 84 // number of bits. 85 86 // chunk & 15 is number of bits 87 // chunk >> 4 is value, including table link 88 89 const ( 90 huffmanChunkBits = 9 91 huffmanNumChunks = 1 << huffmanChunkBits 92 huffmanCountMask = 15 93 huffmanValueShift = 4 94 ) 95 96 type huffmanDecoder struct { 97 min int // the minimum code length 98 chunks [huffmanNumChunks]uint32 // chunks as described above 99 links [][]uint32 // overflow links 100 linkMask uint32 // mask the width of the link table 101 } 102 103 // Initialize Huffman decoding tables from array of code lengths. 104 func (h *huffmanDecoder) init(bits []int) bool { 105 if h.min != 0 { 106 *h = huffmanDecoder{} 107 } 108 109 // Count number of codes of each length, 110 // compute min and max length. 111 var count [maxCodeLen]int 112 var min, max int 113 for _, n := range bits { 114 if n == 0 { 115 continue 116 } 117 if min == 0 || n < min { 118 min = n 119 } 120 if n > max { 121 max = n 122 } 123 count[n]++ 124 } 125 if max == 0 { 126 return false 127 } 128 129 h.min = min 130 var linkBits uint 131 var numLinks int 132 if max > huffmanChunkBits { 133 linkBits = uint(max) - huffmanChunkBits 134 numLinks = 1 << linkBits 135 h.linkMask = uint32(numLinks - 1) 136 } 137 code := 0 138 var nextcode [maxCodeLen]int 139 for i := min; i <= max; i++ { 140 if i == huffmanChunkBits+1 { 141 // create link tables 142 link := code >> 1 143 if huffmanNumChunks < link { 144 return false 145 } 146 h.links = make([][]uint32, huffmanNumChunks-link) 147 for j := uint(link); j < huffmanNumChunks; j++ { 148 reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 149 reverse >>= uint(16 - huffmanChunkBits) 150 off := j - uint(link) 151 h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) 152 h.links[off] = make([]uint32, 1<<linkBits) 153 } 154 } 155 n := count[i] 156 nextcode[i] = code 157 code += n 158 code <<= 1 159 } 160 161 for i, n := range bits { 162 if n == 0 { 163 continue 164 } 165 code := nextcode[n] 166 nextcode[n]++ 167 chunk := uint32(i<<huffmanValueShift | n) 168 reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 169 reverse >>= uint(16 - n) 170 if n <= huffmanChunkBits { 171 for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { 172 h.chunks[off] = chunk 173 } 174 } else { 175 value := h.chunks[reverse&(huffmanNumChunks-1)] >> huffmanValueShift 176 if value >= uint32(len(h.links)) { 177 return false 178 } 179 linktab := h.links[value] 180 reverse >>= huffmanChunkBits 181 for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { 182 linktab[off] = chunk 183 } 184 } 185 } 186 return true 187 } 188 189 // The actual read interface needed by NewReader. 190 // If the passed in io.Reader does not also have ReadByte, 191 // the NewReader will introduce its own buffering. 192 type Reader interface { 193 io.Reader 194 io.ByteReader 195 } 196 197 // Decompress state. 198 type decompressor struct { 199 // Input source. 200 r Reader 201 roffset int64 202 woffset int64 203 204 // Input bits, in top of b. 205 b uint32 206 nb uint 207 208 // Huffman decoders for literal/length, distance. 209 h1, h2 huffmanDecoder 210 211 // Length arrays used to define Huffman codes. 212 bits *[maxLit + maxDist]int 213 codebits *[numCodes]int 214 215 // Output history, buffer. 216 hist *[maxHist]byte 217 hp int // current output position in buffer 218 hw int // have written hist[0:hw] already 219 hfull bool // buffer has filled at least once 220 221 // Temporary buffer (avoids repeated allocation). 222 buf [4]byte 223 224 // Next step in the decompression, 225 // and decompression state. 226 step func(*decompressor) 227 final bool 228 err error 229 toRead []byte 230 hl, hd *huffmanDecoder 231 copyLen int 232 copyDist int 233 } 234 235 func (f *decompressor) nextBlock() { 236 if f.final { 237 if f.hw != f.hp { 238 f.flush((*decompressor).nextBlock) 239 return 240 } 241 f.err = io.EOF 242 return 243 } 244 for f.nb < 1+2 { 245 if f.err = f.moreBits(); f.err != nil { 246 return 247 } 248 } 249 f.final = f.b&1 == 1 250 f.b >>= 1 251 typ := f.b & 3 252 f.b >>= 2 253 f.nb -= 1 + 2 254 switch typ { 255 case 0: 256 f.dataBlock() 257 case 1: 258 // compressed, fixed Huffman tables 259 f.hl = &fixedHuffmanDecoder 260 f.hd = nil 261 f.huffmanBlock() 262 case 2: 263 // compressed, dynamic Huffman tables 264 if f.err = f.readHuffman(); f.err != nil { 265 break 266 } 267 f.hl = &f.h1 268 f.hd = &f.h2 269 f.huffmanBlock() 270 default: 271 // 3 is reserved. 272 f.err = CorruptInputError(f.roffset) 273 } 274 } 275 276 func (f *decompressor) Read(b []byte) (int, error) { 277 for { 278 if len(f.toRead) > 0 { 279 n := copy(b, f.toRead) 280 f.toRead = f.toRead[n:] 281 return n, nil 282 } 283 if f.err != nil { 284 return 0, f.err 285 } 286 f.step(f) 287 } 288 } 289 290 func (f *decompressor) Close() error { 291 if f.err == io.EOF { 292 return nil 293 } 294 return f.err 295 } 296 297 // RFC 1951 section 3.2.7. 298 // Compression with dynamic Huffman codes 299 300 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 301 302 func (f *decompressor) readHuffman() error { 303 // HLIT[5], HDIST[5], HCLEN[4]. 304 for f.nb < 5+5+4 { 305 if err := f.moreBits(); err != nil { 306 return err 307 } 308 } 309 nlit := int(f.b&0x1F) + 257 310 if nlit > maxLit { 311 return CorruptInputError(f.roffset) 312 } 313 f.b >>= 5 314 ndist := int(f.b&0x1F) + 1 315 // maxDist is 32, so ndist is always valid. 316 f.b >>= 5 317 nclen := int(f.b&0xF) + 4 318 // numCodes is 19, so nclen is always valid. 319 f.b >>= 4 320 f.nb -= 5 + 5 + 4 321 322 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 323 for i := 0; i < nclen; i++ { 324 for f.nb < 3 { 325 if err := f.moreBits(); err != nil { 326 return err 327 } 328 } 329 f.codebits[codeOrder[i]] = int(f.b & 0x7) 330 f.b >>= 3 331 f.nb -= 3 332 } 333 for i := nclen; i < len(codeOrder); i++ { 334 f.codebits[codeOrder[i]] = 0 335 } 336 if !f.h1.init(f.codebits[0:]) { 337 return CorruptInputError(f.roffset) 338 } 339 340 // HLIT + 257 code lengths, HDIST + 1 code lengths, 341 // using the code length Huffman code. 342 for i, n := 0, nlit+ndist; i < n; { 343 x, err := f.huffSym(&f.h1) 344 if err != nil { 345 return err 346 } 347 if x < 16 { 348 // Actual length. 349 f.bits[i] = x 350 i++ 351 continue 352 } 353 // Repeat previous length or zero. 354 var rep int 355 var nb uint 356 var b int 357 switch x { 358 default: 359 return InternalError("unexpected length code") 360 case 16: 361 rep = 3 362 nb = 2 363 if i == 0 { 364 return CorruptInputError(f.roffset) 365 } 366 b = f.bits[i-1] 367 case 17: 368 rep = 3 369 nb = 3 370 b = 0 371 case 18: 372 rep = 11 373 nb = 7 374 b = 0 375 } 376 for f.nb < nb { 377 if err := f.moreBits(); err != nil { 378 return err 379 } 380 } 381 rep += int(f.b & uint32(1<<nb-1)) 382 f.b >>= nb 383 f.nb -= nb 384 if i+rep > n { 385 return CorruptInputError(f.roffset) 386 } 387 for j := 0; j < rep; j++ { 388 f.bits[i] = b 389 i++ 390 } 391 } 392 393 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 394 return CorruptInputError(f.roffset) 395 } 396 397 return nil 398 } 399 400 // Decode a single Huffman block from f. 401 // hl and hd are the Huffman states for the lit/length values 402 // and the distance values, respectively. If hd == nil, using the 403 // fixed distance encoding associated with fixed Huffman blocks. 404 func (f *decompressor) huffmanBlock() { 405 for { 406 v, err := f.huffSym(f.hl) 407 if err != nil { 408 f.err = err 409 return 410 } 411 var n uint // number of bits extra 412 var length int 413 switch { 414 case v < 256: 415 f.hist[f.hp] = byte(v) 416 f.hp++ 417 if f.hp == len(f.hist) { 418 // After the flush, continue this loop. 419 f.flush((*decompressor).huffmanBlock) 420 return 421 } 422 continue 423 case v == 256: 424 // Done with huffman block; read next block. 425 f.step = (*decompressor).nextBlock 426 return 427 // otherwise, reference to older data 428 case v < 265: 429 length = v - (257 - 3) 430 n = 0 431 case v < 269: 432 length = v*2 - (265*2 - 11) 433 n = 1 434 case v < 273: 435 length = v*4 - (269*4 - 19) 436 n = 2 437 case v < 277: 438 length = v*8 - (273*8 - 35) 439 n = 3 440 case v < 281: 441 length = v*16 - (277*16 - 67) 442 n = 4 443 case v < 285: 444 length = v*32 - (281*32 - 131) 445 n = 5 446 default: 447 length = 258 448 n = 0 449 } 450 if n > 0 { 451 for f.nb < n { 452 if err = f.moreBits(); err != nil { 453 f.err = err 454 return 455 } 456 } 457 length += int(f.b & uint32(1<<n-1)) 458 f.b >>= n 459 f.nb -= n 460 } 461 462 var dist int 463 if f.hd == nil { 464 for f.nb < 5 { 465 if err = f.moreBits(); err != nil { 466 f.err = err 467 return 468 } 469 } 470 dist = int(reverseByte[(f.b&0x1F)<<3]) 471 f.b >>= 5 472 f.nb -= 5 473 } else { 474 if dist, err = f.huffSym(f.hd); err != nil { 475 f.err = err 476 return 477 } 478 } 479 480 switch { 481 case dist < 4: 482 dist++ 483 case dist >= 30: 484 f.err = CorruptInputError(f.roffset) 485 return 486 default: 487 nb := uint(dist-2) >> 1 488 // have 1 bit in bottom of dist, need nb more. 489 extra := (dist & 1) << nb 490 for f.nb < nb { 491 if err = f.moreBits(); err != nil { 492 f.err = err 493 return 494 } 495 } 496 extra |= int(f.b & uint32(1<<nb-1)) 497 f.b >>= nb 498 f.nb -= nb 499 dist = 1<<(nb+1) + 1 + extra 500 } 501 502 // Copy history[-dist:-dist+length] into output. 503 if dist > len(f.hist) { 504 f.err = InternalError("bad history distance") 505 return 506 } 507 508 // No check on length; encoding can be prescient. 509 if !f.hfull && dist > f.hp { 510 f.err = CorruptInputError(f.roffset) 511 return 512 } 513 514 f.copyLen, f.copyDist = length, dist 515 if f.copyHist() { 516 return 517 } 518 } 519 } 520 521 // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. 522 // It reports whether the f.hist buffer is full. 523 func (f *decompressor) copyHist() bool { 524 p := f.hp - f.copyDist 525 if p < 0 { 526 p += len(f.hist) 527 } 528 for f.copyLen > 0 { 529 n := f.copyLen 530 if x := len(f.hist) - f.hp; n > x { 531 n = x 532 } 533 if x := len(f.hist) - p; n > x { 534 n = x 535 } 536 forwardCopy(f.hist[:], f.hp, p, n) 537 p += n 538 f.hp += n 539 f.copyLen -= n 540 if f.hp == len(f.hist) { 541 // After flush continue copying out of history. 542 f.flush((*decompressor).copyHuff) 543 return true 544 } 545 if p == len(f.hist) { 546 p = 0 547 } 548 } 549 return false 550 } 551 552 func (f *decompressor) copyHuff() { 553 if f.copyHist() { 554 return 555 } 556 f.huffmanBlock() 557 } 558 559 // Copy a single uncompressed data block from input to output. 560 func (f *decompressor) dataBlock() { 561 // Uncompressed. 562 // Discard current half-byte. 563 f.nb = 0 564 f.b = 0 565 566 // Length then ones-complement of length. 567 nr, err := io.ReadFull(f.r, f.buf[0:4]) 568 f.roffset += int64(nr) 569 if err != nil { 570 f.err = &ReadError{f.roffset, err} 571 return 572 } 573 n := int(f.buf[0]) | int(f.buf[1])<<8 574 nn := int(f.buf[2]) | int(f.buf[3])<<8 575 if uint16(nn) != uint16(^n) { 576 f.err = CorruptInputError(f.roffset) 577 return 578 } 579 580 if n == 0 { 581 // 0-length block means sync 582 f.flush((*decompressor).nextBlock) 583 return 584 } 585 586 f.copyLen = n 587 f.copyData() 588 } 589 590 // copyData copies f.copyLen bytes from the underlying reader into f.hist. 591 // It pauses for reads when f.hist is full. 592 func (f *decompressor) copyData() { 593 n := f.copyLen 594 for n > 0 { 595 m := len(f.hist) - f.hp 596 if m > n { 597 m = n 598 } 599 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) 600 f.roffset += int64(m) 601 if err != nil { 602 f.err = &ReadError{f.roffset, err} 603 return 604 } 605 n -= m 606 f.hp += m 607 if f.hp == len(f.hist) { 608 f.copyLen = n 609 f.flush((*decompressor).copyData) 610 return 611 } 612 } 613 f.step = (*decompressor).nextBlock 614 } 615 616 func (f *decompressor) setDict(dict []byte) { 617 if len(dict) > len(f.hist) { 618 // Will only remember the tail. 619 dict = dict[len(dict)-len(f.hist):] 620 } 621 622 f.hp = copy(f.hist[:], dict) 623 if f.hp == len(f.hist) { 624 f.hp = 0 625 f.hfull = true 626 } 627 f.hw = f.hp 628 } 629 630 func (f *decompressor) moreBits() error { 631 c, err := f.r.ReadByte() 632 if err != nil { 633 if err == io.EOF { 634 err = io.ErrUnexpectedEOF 635 } 636 return err 637 } 638 f.roffset++ 639 f.b |= uint32(c) << f.nb 640 f.nb += 8 641 return nil 642 } 643 644 // Read the next Huffman-encoded symbol from f according to h. 645 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { 646 n := uint(h.min) 647 for { 648 for f.nb < n { 649 if err := f.moreBits(); err != nil { 650 return 0, err 651 } 652 } 653 chunk := h.chunks[f.b&(huffmanNumChunks-1)] 654 n = uint(chunk & huffmanCountMask) 655 if n > huffmanChunkBits { 656 chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] 657 n = uint(chunk & huffmanCountMask) 658 if n == 0 { 659 f.err = CorruptInputError(f.roffset) 660 return 0, f.err 661 } 662 } 663 if n <= f.nb { 664 f.b >>= n 665 f.nb -= n 666 return int(chunk >> huffmanValueShift), nil 667 } 668 } 669 } 670 671 // Flush any buffered output to the underlying writer. 672 func (f *decompressor) flush(step func(*decompressor)) { 673 f.toRead = f.hist[f.hw:f.hp] 674 f.woffset += int64(f.hp - f.hw) 675 f.hw = f.hp 676 if f.hp == len(f.hist) { 677 f.hp = 0 678 f.hw = 0 679 f.hfull = true 680 } 681 f.step = step 682 } 683 684 func makeReader(r io.Reader) Reader { 685 if rr, ok := r.(Reader); ok { 686 return rr 687 } 688 return bufio.NewReader(r) 689 } 690 691 func (f *decompressor) Reset(r io.Reader, dict []byte) error { 692 *f = decompressor{ 693 r: makeReader(r), 694 bits: f.bits, 695 codebits: f.codebits, 696 hist: f.hist, 697 step: (*decompressor).nextBlock, 698 } 699 if dict != nil { 700 f.setDict(dict) 701 } 702 return nil 703 } 704 705 // NewReader returns a new ReadCloser that can be used 706 // to read the uncompressed version of r. 707 // If r does not also implement io.ByteReader, 708 // the decompressor may read more data than necessary from r. 709 // It is the caller's responsibility to call Close on the ReadCloser 710 // when finished reading. 711 // 712 // The ReadCloser returned by NewReader also implements Resetter. 713 func NewReader(r io.Reader) io.ReadCloser { 714 var f decompressor 715 f.bits = new([maxLit + maxDist]int) 716 f.codebits = new([numCodes]int) 717 f.r = makeReader(r) 718 f.hist = new([maxHist]byte) 719 f.step = (*decompressor).nextBlock 720 return &f 721 } 722 723 // NewReaderDict is like NewReader but initializes the reader 724 // with a preset dictionary. The returned Reader behaves as if 725 // the uncompressed data stream started with the given dictionary, 726 // which has already been read. NewReaderDict is typically used 727 // to read data compressed by NewWriterDict. 728 // 729 // The ReadCloser returned by NewReader also implements Resetter. 730 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { 731 var f decompressor 732 f.r = makeReader(r) 733 f.hist = new([maxHist]byte) 734 f.bits = new([maxLit + maxDist]int) 735 f.codebits = new([numCodes]int) 736 f.step = (*decompressor).nextBlock 737 f.setDict(dict) 738 return &f 739 }