github.com/AndrienkoAleksandr/go@v0.0.19/src/intern/zstd/literals.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package zstd
     6  
     7  import (
     8  	"encoding/binary"
     9  )
    10  
    11  // readLiterals reads and decompresses the literals from data at off.
    12  // The literals are appended to outbuf, which is returned.
    13  // Also returns the new input offset. RFC 3.1.1.3.1.
    14  func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) {
    15  	if off >= len(data) {
    16  		return 0, nil, r.makeEOFError(off)
    17  	}
    18  
    19  	// Literals section header. RFC 3.1.1.3.1.1.
    20  	hdr := data[off]
    21  	off++
    22  
    23  	if (hdr&3) == 0 || (hdr&3) == 1 {
    24  		return r.readRawRLELiterals(data, off, hdr, outbuf)
    25  	} else {
    26  		return r.readHuffLiterals(data, off, hdr, outbuf)
    27  	}
    28  }
    29  
    30  // readRawRLELiterals reads and decompresses a Raw_Literals_Block or
    31  // a RLE_Literals_Block. RFC 3.1.1.3.1.1.
    32  func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
    33  	raw := (hdr & 3) == 0
    34  
    35  	var regeneratedSize int
    36  	switch (hdr >> 2) & 3 {
    37  	case 0, 2:
    38  		regeneratedSize = int(hdr >> 3)
    39  	case 1:
    40  		if off >= len(data) {
    41  			return 0, nil, r.makeEOFError(off)
    42  		}
    43  		regeneratedSize = int(hdr>>4) + (int(data[off]) << 4)
    44  		off++
    45  	case 3:
    46  		if off+1 >= len(data) {
    47  			return 0, nil, r.makeEOFError(off)
    48  		}
    49  		regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12)
    50  		off += 2
    51  	}
    52  
    53  	// We are going to use the entire literal block in the output.
    54  	// The maximum size of one decompressed block is 128K,
    55  	// so we can't have more literals than that.
    56  	if regeneratedSize > 128<<10 {
    57  		return 0, nil, r.makeError(off, "literal size too large")
    58  	}
    59  
    60  	if raw {
    61  		// RFC 3.1.1.3.1.2.
    62  		if off+regeneratedSize > len(data) {
    63  			return 0, nil, r.makeError(off, "raw literal size too large")
    64  		}
    65  		outbuf = append(outbuf, data[off:off+regeneratedSize]...)
    66  		off += regeneratedSize
    67  	} else {
    68  		// RFC 3.1.1.3.1.3.
    69  		if off >= len(data) {
    70  			return 0, nil, r.makeError(off, "RLE literal missing")
    71  		}
    72  		rle := data[off]
    73  		off++
    74  		for i := 0; i < regeneratedSize; i++ {
    75  			outbuf = append(outbuf, rle)
    76  		}
    77  	}
    78  
    79  	return off, outbuf, nil
    80  }
    81  
    82  // readHuffLiterals reads and decompresses a Compressed_Literals_Block or
    83  // a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
    84  func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
    85  	var (
    86  		regeneratedSize int
    87  		compressedSize  int
    88  		streams         int
    89  	)
    90  	switch (hdr >> 2) & 3 {
    91  	case 0, 1:
    92  		if off+1 >= len(data) {
    93  			return 0, nil, r.makeEOFError(off)
    94  		}
    95  		regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4)
    96  		compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2)
    97  		off += 2
    98  		if ((hdr >> 2) & 3) == 0 {
    99  			streams = 1
   100  		} else {
   101  			streams = 4
   102  		}
   103  	case 2:
   104  		if off+2 >= len(data) {
   105  			return 0, nil, r.makeEOFError(off)
   106  		}
   107  		regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12)
   108  		compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6)
   109  		off += 3
   110  		streams = 4
   111  	case 3:
   112  		if off+3 >= len(data) {
   113  			return 0, nil, r.makeEOFError(off)
   114  		}
   115  		regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12)
   116  		compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10)
   117  		off += 4
   118  		streams = 4
   119  	}
   120  
   121  	// We are going to use the entire literal block in the output.
   122  	// The maximum size of one decompressed block is 128K,
   123  	// so we can't have more literals than that.
   124  	if regeneratedSize > 128<<10 {
   125  		return 0, nil, r.makeError(off, "literal size too large")
   126  	}
   127  
   128  	roff := off + compressedSize
   129  	if roff > len(data) || roff < 0 {
   130  		return 0, nil, r.makeEOFError(off)
   131  	}
   132  
   133  	totalStreamsSize := compressedSize
   134  	if (hdr & 3) == 2 {
   135  		// Compressed_Literals_Block.
   136  		// Read new huffman tree.
   137  
   138  		if len(r.huffmanTable) < 1<<maxHuffmanBits {
   139  			r.huffmanTable = make([]uint16, 1<<maxHuffmanBits)
   140  		}
   141  
   142  		huffmanTableBits, hoff, err := r.readHuff(data, off, r.huffmanTable)
   143  		if err != nil {
   144  			return 0, nil, err
   145  		}
   146  		r.huffmanTableBits = huffmanTableBits
   147  
   148  		if totalStreamsSize < hoff-off {
   149  			return 0, nil, r.makeError(off, "Huffman table too big")
   150  		}
   151  		totalStreamsSize -= hoff - off
   152  		off = hoff
   153  	} else {
   154  		// Treeless_Literals_Block
   155  		// Reuse previous Huffman tree.
   156  		if r.huffmanTableBits == 0 {
   157  			return 0, nil, r.makeError(off, "missing literals Huffman tree")
   158  		}
   159  	}
   160  
   161  	// Decompress compressedSize bytes of data at off using the
   162  	// Huffman tree.
   163  
   164  	var err error
   165  	if streams == 1 {
   166  		outbuf, err = r.readLiteralsOneStream(data, off, totalStreamsSize, regeneratedSize, outbuf)
   167  	} else {
   168  		outbuf, err = r.readLiteralsFourStreams(data, off, totalStreamsSize, regeneratedSize, outbuf)
   169  	}
   170  
   171  	if err != nil {
   172  		return 0, nil, err
   173  	}
   174  
   175  	return roff, outbuf, nil
   176  }
   177  
   178  // readLiteralsOneStream reads a single stream of compressed literals.
   179  func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
   180  	// We let the reverse bit reader read earlier bytes,
   181  	// because the Huffman table ignores bits that it doesn't need.
   182  	rbr, err := r.makeReverseBitReader(data, off+compressedSize-1, off-2)
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	huffTable := r.huffmanTable
   188  	huffBits := uint32(r.huffmanTableBits)
   189  	huffMask := (uint32(1) << huffBits) - 1
   190  
   191  	for i := 0; i < regeneratedSize; i++ {
   192  		if !rbr.fetch(uint8(huffBits)) {
   193  			return nil, rbr.makeError("literals Huffman stream out of bits")
   194  		}
   195  
   196  		var t uint16
   197  		idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
   198  		t = huffTable[idx]
   199  		outbuf = append(outbuf, byte(t>>8))
   200  		rbr.cnt -= uint32(t & 0xff)
   201  	}
   202  
   203  	return outbuf, nil
   204  }
   205  
   206  // readLiteralsFourStreams reads four interleaved streams of
   207  // compressed literals.
   208  func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
   209  	// Read the jump table to find out where the streams are.
   210  	// RFC 3.1.1.3.1.6.
   211  	if off+5 >= len(data) {
   212  		return nil, r.makeEOFError(off)
   213  	}
   214  	if totalStreamsSize < 6 {
   215  		return nil, r.makeError(off, "total streams size too small for jump table")
   216  	}
   217  
   218  	streamSize1 := binary.LittleEndian.Uint16(data[off:])
   219  	streamSize2 := binary.LittleEndian.Uint16(data[off+2:])
   220  	streamSize3 := binary.LittleEndian.Uint16(data[off+4:])
   221  	off += 6
   222  
   223  	tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3)
   224  	if tot > uint64(totalStreamsSize)-6 {
   225  		return nil, r.makeEOFError(off)
   226  	}
   227  	streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot)
   228  
   229  	off--
   230  	off1 := off + int(streamSize1)
   231  	start1 := off + 1
   232  
   233  	off2 := off1 + int(streamSize2)
   234  	start2 := off1 + 1
   235  
   236  	off3 := off2 + int(streamSize3)
   237  	start3 := off2 + 1
   238  
   239  	off4 := off3 + int(streamSize4)
   240  	start4 := off3 + 1
   241  
   242  	// We let the reverse bit readers read earlier bytes,
   243  	// because the Huffman tables ignore bits that they don't need.
   244  
   245  	rbr1, err := r.makeReverseBitReader(data, off1, start1-2)
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	rbr2, err := r.makeReverseBitReader(data, off2, start2-2)
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  
   255  	rbr3, err := r.makeReverseBitReader(data, off3, start3-2)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	rbr4, err := r.makeReverseBitReader(data, off4, start4-2)
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  
   265  	regeneratedStreamSize := (regeneratedSize + 3) / 4
   266  
   267  	out1 := len(outbuf)
   268  	out2 := out1 + regeneratedStreamSize
   269  	out3 := out2 + regeneratedStreamSize
   270  	out4 := out3 + regeneratedStreamSize
   271  
   272  	regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3
   273  
   274  	outbuf = append(outbuf, make([]byte, regeneratedSize)...)
   275  
   276  	huffTable := r.huffmanTable
   277  	huffBits := uint32(r.huffmanTableBits)
   278  	huffMask := (uint32(1) << huffBits) - 1
   279  
   280  	for i := 0; i < regeneratedStreamSize; i++ {
   281  		use4 := i < regeneratedStreamSize4
   282  
   283  		fetchHuff := func(rbr *reverseBitReader) (uint16, error) {
   284  			if !rbr.fetch(uint8(huffBits)) {
   285  				return 0, rbr.makeError("literals Huffman stream out of bits")
   286  			}
   287  			idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
   288  			return huffTable[idx], nil
   289  		}
   290  
   291  		t1, err := fetchHuff(&rbr1)
   292  		if err != nil {
   293  			return nil, err
   294  		}
   295  
   296  		t2, err := fetchHuff(&rbr2)
   297  		if err != nil {
   298  			return nil, err
   299  		}
   300  
   301  		t3, err := fetchHuff(&rbr3)
   302  		if err != nil {
   303  			return nil, err
   304  		}
   305  
   306  		if use4 {
   307  			t4, err := fetchHuff(&rbr4)
   308  			if err != nil {
   309  				return nil, err
   310  			}
   311  			outbuf[out4] = byte(t4 >> 8)
   312  			out4++
   313  			rbr4.cnt -= uint32(t4 & 0xff)
   314  		}
   315  
   316  		outbuf[out1] = byte(t1 >> 8)
   317  		out1++
   318  		rbr1.cnt -= uint32(t1 & 0xff)
   319  
   320  		outbuf[out2] = byte(t2 >> 8)
   321  		out2++
   322  		rbr2.cnt -= uint32(t2 & 0xff)
   323  
   324  		outbuf[out3] = byte(t3 >> 8)
   325  		out3++
   326  		rbr3.cnt -= uint32(t3 & 0xff)
   327  	}
   328  
   329  	return outbuf, nil
   330  }