github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/compress/snappy/decode.go (about)

     1  // Copyright 2011 The Snappy-Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package snappy
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  var (
    14  	// ErrCorrupt reports that the input is invalid.
    15  	ErrCorrupt = errors.New("snappy: corrupt input")
    16  	// ErrTooLarge reports that the uncompressed length is too large.
    17  	ErrTooLarge = errors.New("snappy: decoded block is too large")
    18  	// ErrUnsupported reports that the input isn't supported.
    19  	ErrUnsupported = errors.New("snappy: unsupported input")
    20  
    21  	errUnsupportedCopy4Tag      = errors.New("snappy: unsupported COPY_4 tag")
    22  	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
    23  )
    24  
    25  // DecodedLen returns the length of the decoded block.
    26  func DecodedLen(src []byte) (int, error) {
    27  	v, _, err := decodedLen(src)
    28  	return v, err
    29  }
    30  
    31  // decodedLen returns the length of the decoded block and the number of bytes
    32  // that the length header occupied.
    33  func decodedLen(src []byte) (blockLen, headerLen int, err error) {
    34  	v, n := binary.Uvarint(src)
    35  	if n <= 0 || v > 0xffffffff {
    36  		return 0, 0, ErrCorrupt
    37  	}
    38  
    39  	const wordSize = 32 << (^uint(0) >> 32 & 1)
    40  	if wordSize == 32 && v > 0x7fffffff {
    41  		return 0, 0, ErrTooLarge
    42  	}
    43  	return int(v), n, nil
    44  }
    45  
    46  // Decode returns the decoded form of src. The returned slice may be a sub-
    47  // slice of dst if dst was large enough to hold the entire decoded block.
    48  // Otherwise, a newly allocated slice will be returned.
    49  // It is valid to pass a nil dst.
    50  func Decode(dst, src []byte) ([]byte, error) {
    51  	dLen, s, err := decodedLen(src)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  	if len(dst) < dLen {
    56  		dst = make([]byte, dLen)
    57  	}
    58  
    59  	var d, offset, length int
    60  	for s < len(src) {
    61  		switch src[s] & 0x03 {
    62  		case tagLiteral:
    63  			x := uint(src[s] >> 2)
    64  			switch {
    65  			case x < 60:
    66  				s++
    67  			case x == 60:
    68  				s += 2
    69  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
    70  					return nil, ErrCorrupt
    71  				}
    72  				x = uint(src[s-1])
    73  			case x == 61:
    74  				s += 3
    75  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
    76  					return nil, ErrCorrupt
    77  				}
    78  				x = uint(src[s-2]) | uint(src[s-1])<<8
    79  			case x == 62:
    80  				s += 4
    81  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
    82  					return nil, ErrCorrupt
    83  				}
    84  				x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
    85  			case x == 63:
    86  				s += 5
    87  				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
    88  					return nil, ErrCorrupt
    89  				}
    90  				x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
    91  			}
    92  			// length always > 0
    93  			length = int(x + 1)
    94  			if length > len(dst)-d || length > len(src)-s {
    95  				return nil, ErrCorrupt
    96  			}
    97  			copy(dst[d:], src[s:s+length])
    98  			d += length
    99  			s += length
   100  			continue
   101  
   102  		case tagCopy1:
   103  			s += 2
   104  			if s > len(src) {
   105  				return nil, ErrCorrupt
   106  			}
   107  			length = 4 + int(src[s-2])>>2&0x7
   108  			offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
   109  
   110  		case tagCopy2:
   111  			s += 3
   112  			if s > len(src) {
   113  				return nil, ErrCorrupt
   114  			}
   115  			length = 1 + int(src[s-3])>>2
   116  			offset = int(src[s-2]) | int(src[s-1])<<8
   117  
   118  		case tagCopy4:
   119  			return nil, errUnsupportedCopy4Tag
   120  		}
   121  
   122  		if uint(offset-1) >= uint(d) || length > len(dst)-d {
   123  			return nil, ErrCorrupt
   124  		}
   125  		for end := d + length; d != end; d++ {
   126  			dst[d] = dst[d-offset]
   127  		}
   128  	}
   129  	if d != dLen {
   130  		return nil, ErrCorrupt
   131  	}
   132  	return dst[:d], nil
   133  }
   134  
   135  // NewReader returns a new Reader that decompresses from r, using the framing
   136  // format described at
   137  // https://yougam/libraries/google/snappy/blob/master/framing_format.txt
   138  func NewReader(r io.Reader) *Reader {
   139  	return &Reader{
   140  		r:       r,
   141  		decoded: make([]byte, maxUncompressedChunkLen),
   142  		buf:     make([]byte, maxEncodedLenOfMaxUncompressedChunkLen+checksumSize),
   143  	}
   144  }
   145  
   146  // Reader is an io.Reader that can read Snappy-compressed bytes.
   147  type Reader struct {
   148  	r       io.Reader
   149  	err     error
   150  	decoded []byte
   151  	buf     []byte
   152  	// decoded[i:j] contains decoded bytes that have not yet been passed on.
   153  	i, j       int
   154  	readHeader bool
   155  }
   156  
   157  // Reset discards any buffered data, resets all state, and switches the Snappy
   158  // reader to read from r. This permits reusing a Reader rather than allocating
   159  // a new one.
   160  func (r *Reader) Reset(reader io.Reader) {
   161  	r.r = reader
   162  	r.err = nil
   163  	r.i = 0
   164  	r.j = 0
   165  	r.readHeader = false
   166  }
   167  
   168  func (r *Reader) readFull(p []byte) (ok bool) {
   169  	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
   170  		if r.err == io.ErrUnexpectedEOF {
   171  			r.err = ErrCorrupt
   172  		}
   173  		return false
   174  	}
   175  	return true
   176  }
   177  
   178  // Read satisfies the io.Reader interface.
   179  func (r *Reader) Read(p []byte) (int, error) {
   180  	if r.err != nil {
   181  		return 0, r.err
   182  	}
   183  	for {
   184  		if r.i < r.j {
   185  			n := copy(p, r.decoded[r.i:r.j])
   186  			r.i += n
   187  			return n, nil
   188  		}
   189  		if !r.readFull(r.buf[:4]) {
   190  			return 0, r.err
   191  		}
   192  		chunkType := r.buf[0]
   193  		if !r.readHeader {
   194  			if chunkType != chunkTypeStreamIdentifier {
   195  				r.err = ErrCorrupt
   196  				return 0, r.err
   197  			}
   198  			r.readHeader = true
   199  		}
   200  		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
   201  		if chunkLen > len(r.buf) {
   202  			r.err = ErrUnsupported
   203  			return 0, r.err
   204  		}
   205  
   206  		// The chunk types are specified at
   207  		// https://yougam/libraries/google/snappy/blob/master/framing_format.txt
   208  		switch chunkType {
   209  		case chunkTypeCompressedData:
   210  			// Section 4.2. Compressed data (chunk type 0x00).
   211  			if chunkLen < checksumSize {
   212  				r.err = ErrCorrupt
   213  				return 0, r.err
   214  			}
   215  			buf := r.buf[:chunkLen]
   216  			if !r.readFull(buf) {
   217  				return 0, r.err
   218  			}
   219  			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   220  			buf = buf[checksumSize:]
   221  
   222  			n, err := DecodedLen(buf)
   223  			if err != nil {
   224  				r.err = err
   225  				return 0, r.err
   226  			}
   227  			if n > len(r.decoded) {
   228  				r.err = ErrCorrupt
   229  				return 0, r.err
   230  			}
   231  			if _, err := Decode(r.decoded, buf); err != nil {
   232  				r.err = err
   233  				return 0, r.err
   234  			}
   235  			if crc(r.decoded[:n]) != checksum {
   236  				r.err = ErrCorrupt
   237  				return 0, r.err
   238  			}
   239  			r.i, r.j = 0, n
   240  			continue
   241  
   242  		case chunkTypeUncompressedData:
   243  			// Section 4.3. Uncompressed data (chunk type 0x01).
   244  			if chunkLen < checksumSize {
   245  				r.err = ErrCorrupt
   246  				return 0, r.err
   247  			}
   248  			buf := r.buf[:checksumSize]
   249  			if !r.readFull(buf) {
   250  				return 0, r.err
   251  			}
   252  			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
   253  			// Read directly into r.decoded instead of via r.buf.
   254  			n := chunkLen - checksumSize
   255  			if !r.readFull(r.decoded[:n]) {
   256  				return 0, r.err
   257  			}
   258  			if crc(r.decoded[:n]) != checksum {
   259  				r.err = ErrCorrupt
   260  				return 0, r.err
   261  			}
   262  			r.i, r.j = 0, n
   263  			continue
   264  
   265  		case chunkTypeStreamIdentifier:
   266  			// Section 4.1. Stream identifier (chunk type 0xff).
   267  			if chunkLen != len(magicBody) {
   268  				r.err = ErrCorrupt
   269  				return 0, r.err
   270  			}
   271  			if !r.readFull(r.buf[:len(magicBody)]) {
   272  				return 0, r.err
   273  			}
   274  			for i := 0; i < len(magicBody); i++ {
   275  				if r.buf[i] != magicBody[i] {
   276  					r.err = ErrCorrupt
   277  					return 0, r.err
   278  				}
   279  			}
   280  			continue
   281  		}
   282  
   283  		if chunkType <= 0x7f {
   284  			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
   285  			r.err = ErrUnsupported
   286  			return 0, r.err
   287  		}
   288  		// Section 4.4 Padding (chunk type 0xfe).
   289  		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
   290  		if !r.readFull(r.buf[:chunkLen]) {
   291  			return 0, r.err
   292  		}
   293  	}
   294  }