github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/snappy.go

github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/snappy.go (about)

     1  // Copyright 2019+ Klaus Post. All rights reserved.
     2  // License information can be found in the LICENSE file.
     3  // Based on work by Yann Collet, released under BSD License.
     4  
     5  package zstd
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"hash/crc32"
    11  	"io"
    12  
    13  	"github.com/bir3/gocompiler/extra/compress/huff0"
    14  	snappy "github.com/bir3/gocompiler/extra/compress/internal/snapref"
    15  )
    16  
    17  const (
    18  	snappyTagLiteral = 0x00
    19  	snappyTagCopy1   = 0x01
    20  	snappyTagCopy2   = 0x02
    21  	snappyTagCopy4   = 0x03
    22  )
    23  
    24  const (
    25  	snappyChecksumSize = 4
    26  	snappyMagicBody    = "sNaPpY"
    27  
    28  	// snappyMaxBlockSize is the maximum size of the input to encodeBlock. It is not
    29  	// part of the wire format per se, but some parts of the encoder assume
    30  	// that an offset fits into a uint16.
    31  	//
    32  	// Also, for the framing format (Writer type instead of Encode function),
    33  	// https://github.com/google/snappy/blob/master/framing_format.txt says
    34  	// that "the uncompressed data in a chunk must be no longer than 65536
    35  	// bytes".
    36  	snappyMaxBlockSize = 65536
    37  
    38  	// snappyMaxEncodedLenOfMaxBlockSize equals MaxEncodedLen(snappyMaxBlockSize), but is
    39  	// hard coded to be a const instead of a variable, so that obufLen can also
    40  	// be a const. Their equivalence is confirmed by
    41  	// TestMaxEncodedLenOfMaxBlockSize.
    42  	snappyMaxEncodedLenOfMaxBlockSize = 76490
    43  )
    44  
    45  const (
    46  	chunkTypeCompressedData   = 0x00
    47  	chunkTypeUncompressedData = 0x01
    48  	chunkTypePadding          = 0xfe
    49  	chunkTypeStreamIdentifier = 0xff
    50  )
    51  
    52  var (
    53  	// ErrSnappyCorrupt reports that the input is invalid.
    54  	ErrSnappyCorrupt = errors.New("snappy: corrupt input")
    55  	// ErrSnappyTooLarge reports that the uncompressed length is too large.
    56  	ErrSnappyTooLarge = errors.New("snappy: decoded block is too large")
    57  	// ErrSnappyUnsupported reports that the input isn't supported.
    58  	ErrSnappyUnsupported = errors.New("snappy: unsupported input")
    59  
    60  	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
    61  )
    62  
    63  // SnappyConverter can read SnappyConverter-compressed streams and convert them to zstd.
    64  // Conversion is done by converting the stream directly from Snappy without intermediate
    65  // full decoding.
    66  // Therefore the compression ratio is much less than what can be done by a full decompression
    67  // and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without
    68  // any errors being generated.
    69  // No CRC value is being generated and not all CRC values of the Snappy stream are checked.
    70  // However, it provides really fast recompression of Snappy streams.
    71  // The converter can be reused to avoid allocations, even after errors.
    72  type SnappyConverter struct {
    73  	r     io.Reader
    74  	err   error
    75  	buf   []byte
    76  	block *blockEnc
    77  }
    78  
    79  // Convert the Snappy stream supplied in 'in' and write the zStandard stream to 'w'.
    80  // If any error is detected on the Snappy stream it is returned.
    81  // The number of bytes written is returned.
    82  func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
    83  	initPredefined()
    84  	r.err = nil
    85  	r.r = in
    86  	if r.block == nil {
    87  		r.block = &blockEnc{}
    88  		r.block.init()
    89  	}
    90  	r.block.initNewEncode()
    91  	if len(r.buf) != snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize {
    92  		r.buf = make([]byte, snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize)
    93  	}
    94  	r.block.litEnc.Reuse = huff0.ReusePolicyNone
    95  	var written int64
    96  	var readHeader bool
    97  	{
    98  		var header []byte
    99  		var n int
   100  		header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
   101  
   102  		n, r.err = w.Write(header)
   103  		if r.err != nil {
   104  			return written, r.err
   105  		}
   106  		written += int64(n)
   107  	}
   108  
   109  	for {
   110  		if !r.readFull(r.buf[:4], true) {
   111  			// Add empty last block
   112  			r.block.reset(nil)
   113  			r.block.last = true
   114  			err := r.block.encodeLits(r.block.literals, false)
   115  			if err != nil {
   116  				return written, err
   117  			}
   118  			n, err := w.Write(r.block.output)
   119  			if err != nil {
   120  				return written, err
   121  			}
   122  			written += int64(n)
   123  
   124  			return written, r.err
   125  		}
   126  		chunkType := r.buf[0]
   127  		if !readHeader {
   128  			if chunkType != chunkTypeStreamIdentifier {
   129  				println("chunkType != chunkTypeStreamIdentifier", chunkType)
   130  				r.err = ErrSnappyCorrupt
   131  				return written, r.err
   132  			}
   133  			readHeader = true
   134  		}
   135  		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
   136  		if chunkLen > len(r.buf) {
   137  			println("chunkLen > len(r.buf)", chunkType)
   138  			r.err = ErrSnappyUnsupported
   139  			return written, r.err
   140  		}
   141  
   142  		// The chunk types are specified at
   143  		// https://github.com/google/snappy/blob/master/framing_format.txt
   144  		switch chunkType {
   145  		case chunkTypeCompressedData:
   146  			// Section 4.2. Compressed data (chunk type 0x00).
   147  			if chunkLen < snappyChecksumSize {
   148  				println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize)
   149  				r.err = ErrSnappyCorrupt
   150  				return written, r.err
   151  			}
   152  			buf := r.buf[:chunkLen]
   153  			if !r.readFull(buf, false) {
   154  				return written, r.err
   155  			}
   156  			//checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   157  			buf = buf[snappyChecksumSize:]
   158  
   159  			n, hdr, err := snappyDecodedLen(buf)
   160  			if err != nil {
   161  				r.err = err
   162  				return written, r.err
   163  			}
   164  			buf = buf[hdr:]
   165  			if n > snappyMaxBlockSize {
   166  				println("n > snappyMaxBlockSize", n, snappyMaxBlockSize)
   167  				r.err = ErrSnappyCorrupt
   168  				return written, r.err
   169  			}
   170  			r.block.reset(nil)
   171  			r.block.pushOffsets()
   172  			if err := decodeSnappy(r.block, buf); err != nil {
   173  				r.err = err
   174  				return written, r.err
   175  			}
   176  			if r.block.size+r.block.extraLits != n {
   177  				printf("invalid size, want %d, got %d\n", n, r.block.size+r.block.extraLits)
   178  				r.err = ErrSnappyCorrupt
   179  				return written, r.err
   180  			}
   181  			err = r.block.encode(nil, false, false)
   182  			switch err {
   183  			case errIncompressible:
   184  				r.block.popOffsets()
   185  				r.block.reset(nil)
   186  				r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen])
   187  				if err != nil {
   188  					return written, err
   189  				}
   190  				err = r.block.encodeLits(r.block.literals, false)
   191  				if err != nil {
   192  					return written, err
   193  				}
   194  			case nil:
   195  			default:
   196  				return written, err
   197  			}
   198  
   199  			n, r.err = w.Write(r.block.output)
   200  			if r.err != nil {
   201  				return written, err
   202  			}
   203  			written += int64(n)
   204  			continue
   205  		case chunkTypeUncompressedData:
   206  			if debugEncoder {
   207  				println("Uncompressed, chunklen", chunkLen)
   208  			}
   209  			// Section 4.3. Uncompressed data (chunk type 0x01).
   210  			if chunkLen < snappyChecksumSize {
   211  				println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize)
   212  				r.err = ErrSnappyCorrupt
   213  				return written, r.err
   214  			}
   215  			r.block.reset(nil)
   216  			buf := r.buf[:snappyChecksumSize]
   217  			if !r.readFull(buf, false) {
   218  				return written, r.err
   219  			}
   220  			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   221  			// Read directly into r.decoded instead of via r.buf.
   222  			n := chunkLen - snappyChecksumSize
   223  			if n > snappyMaxBlockSize {
   224  				println("n > snappyMaxBlockSize", n, snappyMaxBlockSize)
   225  				r.err = ErrSnappyCorrupt
   226  				return written, r.err
   227  			}
   228  			r.block.literals = r.block.literals[:n]
   229  			if !r.readFull(r.block.literals, false) {
   230  				return written, r.err
   231  			}
   232  			if snappyCRC(r.block.literals) != checksum {
   233  				println("literals crc mismatch")
   234  				r.err = ErrSnappyCorrupt
   235  				return written, r.err
   236  			}
   237  			err := r.block.encodeLits(r.block.literals, false)
   238  			if err != nil {
   239  				return written, err
   240  			}
   241  			n, r.err = w.Write(r.block.output)
   242  			if r.err != nil {
   243  				return written, err
   244  			}
   245  			written += int64(n)
   246  			continue
   247  
   248  		case chunkTypeStreamIdentifier:
   249  			if debugEncoder {
   250  				println("stream id", chunkLen, len(snappyMagicBody))
   251  			}
   252  			// Section 4.1. Stream identifier (chunk type 0xff).
   253  			if chunkLen != len(snappyMagicBody) {
   254  				println("chunkLen != len(snappyMagicBody)", chunkLen, len(snappyMagicBody))
   255  				r.err = ErrSnappyCorrupt
   256  				return written, r.err
   257  			}
   258  			if !r.readFull(r.buf[:len(snappyMagicBody)], false) {
   259  				return written, r.err
   260  			}
   261  			for i := 0; i < len(snappyMagicBody); i++ {
   262  				if r.buf[i] != snappyMagicBody[i] {
   263  					println("r.buf[i] != snappyMagicBody[i]", r.buf[i], snappyMagicBody[i], i)
   264  					r.err = ErrSnappyCorrupt
   265  					return written, r.err
   266  				}
   267  			}
   268  			continue
   269  		}
   270  
   271  		if chunkType <= 0x7f {
   272  			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
   273  			println("chunkType <= 0x7f")
   274  			r.err = ErrSnappyUnsupported
   275  			return written, r.err
   276  		}
   277  		// Section 4.4 Padding (chunk type 0xfe).
   278  		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
   279  		if !r.readFull(r.buf[:chunkLen], false) {
   280  			return written, r.err
   281  		}
   282  	}
   283  }
   284  
   285  // decodeSnappy writes the decoding of src to dst. It assumes that the varint-encoded
   286  // length of the decompressed bytes has already been read.
   287  func decodeSnappy(blk *blockEnc, src []byte) error {
   288  	//decodeRef(make([]byte, snappyMaxBlockSize), src)
   289  	var s, length int
   290  	lits := blk.extraLits
   291  	var offset uint32
   292  	for s < len(src) {
   293  		switch src[s] & 0x03 {
   294  		case snappyTagLiteral:
   295  			x := uint32(src[s] >> 2)
   296  			switch {
   297  			case x < 60:
   298  				s++
   299  			case x == 60:
   300  				s += 2
   301  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   302  					println("uint(s) > uint(len(src)", s, src)
   303  					return ErrSnappyCorrupt
   304  				}
   305  				x = uint32(src[s-1])
   306  			case x == 61:
   307  				s += 3
   308  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   309  					println("uint(s) > uint(len(src)", s, src)
   310  					return ErrSnappyCorrupt
   311  				}
   312  				x = uint32(src[s-2]) | uint32(src[s-1])<<8
   313  			case x == 62:
   314  				s += 4
   315  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   316  					println("uint(s) > uint(len(src)", s, src)
   317  					return ErrSnappyCorrupt
   318  				}
   319  				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
   320  			case x == 63:
   321  				s += 5
   322  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   323  					println("uint(s) > uint(len(src)", s, src)
   324  					return ErrSnappyCorrupt
   325  				}
   326  				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
   327  			}
   328  			if x > snappyMaxBlockSize {
   329  				println("x > snappyMaxBlockSize", x, snappyMaxBlockSize)
   330  				return ErrSnappyCorrupt
   331  			}
   332  			length = int(x) + 1
   333  			if length <= 0 {
   334  				println("length <= 0 ", length)
   335  
   336  				return errUnsupportedLiteralLength
   337  			}
   338  			//if length > snappyMaxBlockSize-d || uint32(length) > len(src)-s {
   339  			//	return ErrSnappyCorrupt
   340  			//}
   341  
   342  			blk.literals = append(blk.literals, src[s:s+length]...)
   343  			//println(length, "litLen")
   344  			lits += length
   345  			s += length
   346  			continue
   347  
   348  		case snappyTagCopy1:
   349  			s += 2
   350  			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   351  				println("uint(s) > uint(len(src)", s, len(src))
   352  				return ErrSnappyCorrupt
   353  			}
   354  			length = 4 + int(src[s-2])>>2&0x7
   355  			offset = uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])
   356  
   357  		case snappyTagCopy2:
   358  			s += 3
   359  			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   360  				println("uint(s) > uint(len(src)", s, len(src))
   361  				return ErrSnappyCorrupt
   362  			}
   363  			length = 1 + int(src[s-3])>>2
   364  			offset = uint32(src[s-2]) | uint32(src[s-1])<<8
   365  
   366  		case snappyTagCopy4:
   367  			s += 5
   368  			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
   369  				println("uint(s) > uint(len(src)", s, len(src))
   370  				return ErrSnappyCorrupt
   371  			}
   372  			length = 1 + int(src[s-5])>>2
   373  			offset = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
   374  		}
   375  
   376  		if offset <= 0 || blk.size+lits < int(offset) /*|| length > len(blk)-d */ {
   377  			println("offset <= 0 || blk.size+lits < int(offset)", offset, blk.size+lits, int(offset), blk.size, lits)
   378  
   379  			return ErrSnappyCorrupt
   380  		}
   381  
   382  		// Check if offset is one of the recent offsets.
   383  		// Adjusts the output offset accordingly.
   384  		// Gives a tiny bit of compression, typically around 1%.
   385  		if false {
   386  			offset = blk.matchOffset(offset, uint32(lits))
   387  		} else {
   388  			offset += 3
   389  		}
   390  
   391  		blk.sequences = append(blk.sequences, seq{
   392  			litLen:   uint32(lits),
   393  			offset:   offset,
   394  			matchLen: uint32(length) - zstdMinMatch,
   395  		})
   396  		blk.size += length + lits
   397  		lits = 0
   398  	}
   399  	blk.extraLits = lits
   400  	return nil
   401  }
   402  
   403  func (r *SnappyConverter) readFull(p []byte, allowEOF bool) (ok bool) {
   404  	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
   405  		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
   406  			r.err = ErrSnappyCorrupt
   407  		}
   408  		return false
   409  	}
   410  	return true
   411  }
   412  
   413  var crcTable = crc32.MakeTable(crc32.Castagnoli)
   414  
   415  // crc implements the checksum specified in section 3 of
   416  // https://github.com/google/snappy/blob/master/framing_format.txt
   417  func snappyCRC(b []byte) uint32 {
   418  	c := crc32.Update(0, crcTable, b)
   419  	return c>>15 | c<<17 + 0xa282ead8
   420  }
   421  
   422  // snappyDecodedLen returns the length of the decoded block and the number of bytes
   423  // that the length header occupied.
   424  func snappyDecodedLen(src []byte) (blockLen, headerLen int, err error) {
   425  	v, n := binary.Uvarint(src)
   426  	if n <= 0 || v > 0xffffffff {
   427  		return 0, 0, ErrSnappyCorrupt
   428  	}
   429  
   430  	const wordSize = 32 << (^uint(0) >> 32 & 1)
   431  	if wordSize == 32 && v > 0x7fffffff {
   432  		return 0, 0, ErrSnappyTooLarge
   433  	}
   434  	return int(v), n, nil
   435  }