github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/zstd/literals.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package zstd
     6  
     7  import (
     8  	"encoding/binary"
     9  )
    10  
    11  // readLiterals reads and decompresses the literals from data at off.
    12  // The literals are appended to outbuf, which is returned.
    13  // Also returns the new input offset. RFC 3.1.1.3.1.
    14  func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) {
    15  	if off >= len(data) {
    16  		return 0, nil, r.makeEOFError(off)
    17  	}
    18  
    19  	// Literals section header. RFC 3.1.1.3.1.1.
    20  	hdr := data[off]
    21  	off++
    22  
    23  	if (hdr&3) == 0 || (hdr&3) == 1 {
    24  		return r.readRawRLELiterals(data, off, hdr, outbuf)
    25  	} else {
    26  		return r.readHuffLiterals(data, off, hdr, outbuf)
    27  	}
    28  }
    29  
    30  // readRawRLELiterals reads and decompresses a Raw_Literals_Block or
    31  // a RLE_Literals_Block. RFC 3.1.1.3.1.1.
    32  func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
    33  	raw := (hdr & 3) == 0
    34  
    35  	var regeneratedSize int
    36  	switch (hdr >> 2) & 3 {
    37  	case 0, 2:
    38  		regeneratedSize = int(hdr >> 3)
    39  	case 1:
    40  		if off >= len(data) {
    41  			return 0, nil, r.makeEOFError(off)
    42  		}
    43  		regeneratedSize = int(hdr>>4) + (int(data[off]) << 4)
    44  		off++
    45  	case 3:
    46  		if off+1 >= len(data) {
    47  			return 0, nil, r.makeEOFError(off)
    48  		}
    49  		regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12)
    50  		off += 2
    51  	}
    52  
    53  	// We are going to use the entire literal block in the output.
    54  	// The maximum size of one decompressed block is 128K,
    55  	// so we can't have more literals than that.
    56  	if regeneratedSize > 128<<10 {
    57  		return 0, nil, r.makeError(off, "literal size too large")
    58  	}
    59  
    60  	if raw {
    61  		// RFC 3.1.1.3.1.2.
    62  		if off+regeneratedSize > len(data) {
    63  			return 0, nil, r.makeError(off, "raw literal size too large")
    64  		}
    65  		outbuf = append(outbuf, data[off:off+regeneratedSize]...)
    66  		off += regeneratedSize
    67  	} else {
    68  		// RFC 3.1.1.3.1.3.
    69  		if off >= len(data) {
    70  			return 0, nil, r.makeError(off, "RLE literal missing")
    71  		}
    72  		rle := data[off]
    73  		off++
    74  		for i := 0; i < regeneratedSize; i++ {
    75  			outbuf = append(outbuf, rle)
    76  		}
    77  	}
    78  
    79  	return off, outbuf, nil
    80  }
    81  
    82  // readHuffLiterals reads and decompresses a Compressed_Literals_Block or
    83  // a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
    84  func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
    85  	var (
    86  		regeneratedSize int
    87  		compressedSize  int
    88  		streams         int
    89  	)
    90  	switch (hdr >> 2) & 3 {
    91  	case 0, 1:
    92  		if off+1 >= len(data) {
    93  			return 0, nil, r.makeEOFError(off)
    94  		}
    95  		regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4)
    96  		compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2)
    97  		off += 2
    98  		if ((hdr >> 2) & 3) == 0 {
    99  			streams = 1
   100  		} else {
   101  			streams = 4
   102  		}
   103  	case 2:
   104  		if off+2 >= len(data) {
   105  			return 0, nil, r.makeEOFError(off)
   106  		}
   107  		regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12)
   108  		compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6)
   109  		off += 3
   110  		streams = 4
   111  	case 3:
   112  		if off+3 >= len(data) {
   113  			return 0, nil, r.makeEOFError(off)
   114  		}
   115  		regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12)
   116  		compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10)
   117  		off += 4
   118  		streams = 4
   119  	}
   120  
   121  	// We are going to use the entire literal block in the output.
   122  	// The maximum size of one decompressed block is 128K,
   123  	// so we can't have more literals than that.
   124  	if regeneratedSize > 128<<10 {
   125  		return 0, nil, r.makeError(off, "literal size too large")
   126  	}
   127  
   128  	roff := off + compressedSize
   129  	if roff > len(data) || roff < 0 {
   130  		return 0, nil, r.makeEOFError(off)
   131  	}
   132  
   133  	totalStreamsSize := compressedSize
   134  	if (hdr & 3) == 2 {
   135  		// Compressed_Literals_Block.
   136  		// Read new huffman tree.
   137  
   138  		if len(r.huffmanTable) < 1<<maxHuffmanBits {
   139  			r.huffmanTable = make([]uint16, 1<<maxHuffmanBits)
   140  		}
   141  
   142  		huffmanTableBits, hoff, err := r.readHuff(data, off, r.huffmanTable)
   143  		if err != nil {
   144  			return 0, nil, err
   145  		}
   146  		r.huffmanTableBits = huffmanTableBits
   147  
   148  		if totalStreamsSize < hoff-off {
   149  			return 0, nil, r.makeError(off, "Huffman table too big")
   150  		}
   151  		totalStreamsSize -= hoff - off
   152  		off = hoff
   153  	} else {
   154  		// Treeless_Literals_Block
   155  		// Reuse previous Huffman tree.
   156  		if r.huffmanTableBits == 0 {
   157  			return 0, nil, r.makeError(off, "missing literals Huffman tree")
   158  		}
   159  	}
   160  
   161  	// Decompress compressedSize bytes of data at off using the
   162  	// Huffman tree.
   163  
   164  	var err error
   165  	if streams == 1 {
   166  		outbuf, err = r.readLiteralsOneStream(data, off, totalStreamsSize, regeneratedSize, outbuf)
   167  	} else {
   168  		outbuf, err = r.readLiteralsFourStreams(data, off, totalStreamsSize, regeneratedSize, outbuf)
   169  	}
   170  
   171  	if err != nil {
   172  		return 0, nil, err
   173  	}
   174  
   175  	return roff, outbuf, nil
   176  }
   177  
   178  // readLiteralsOneStream reads a single stream of compressed literals.
   179  func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
   180  	// We let the reverse bit reader read earlier bytes,
   181  	// because the Huffman table ignores bits that it doesn't need.
   182  	rbr, err := r.makeReverseBitReader(data, off+compressedSize-1, off-2)
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	huffTable := r.huffmanTable
   188  	huffBits := uint32(r.huffmanTableBits)
   189  	huffMask := (uint32(1) << huffBits) - 1
   190  
   191  	for i := 0; i < regeneratedSize; i++ {
   192  		if !rbr.fetch(uint8(huffBits)) {
   193  			return nil, rbr.makeError("literals Huffman stream out of bits")
   194  		}
   195  
   196  		var t uint16
   197  		idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
   198  		t = huffTable[idx]
   199  		outbuf = append(outbuf, byte(t>>8))
   200  		rbr.cnt -= uint32(t & 0xff)
   201  	}
   202  
   203  	return outbuf, nil
   204  }
   205  
   206  // readLiteralsFourStreams reads four interleaved streams of
   207  // compressed literals.
   208  func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
   209  	// Read the jump table to find out where the streams are.
   210  	// RFC 3.1.1.3.1.6.
   211  	if off+5 >= len(data) {
   212  		return nil, r.makeEOFError(off)
   213  	}
   214  	if totalStreamsSize < 6 {
   215  		return nil, r.makeError(off, "total streams size too small for jump table")
   216  	}
   217  	// RFC 3.1.1.3.1.6.
   218  	// "The decompressed size of each stream is equal to (Regenerated_Size+3)/4,
   219  	// except for the last stream, which may be up to 3 bytes smaller,
   220  	// to reach a total decompressed size as specified in Regenerated_Size."
   221  	regeneratedStreamSize := (regeneratedSize + 3) / 4
   222  	if regeneratedSize < regeneratedStreamSize*3 {
   223  		return nil, r.makeError(off, "regenerated size too small to decode streams")
   224  	}
   225  
   226  	streamSize1 := binary.LittleEndian.Uint16(data[off:])
   227  	streamSize2 := binary.LittleEndian.Uint16(data[off+2:])
   228  	streamSize3 := binary.LittleEndian.Uint16(data[off+4:])
   229  	off += 6
   230  
   231  	tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3)
   232  	if tot > uint64(totalStreamsSize)-6 {
   233  		return nil, r.makeEOFError(off)
   234  	}
   235  	streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot)
   236  
   237  	off--
   238  	off1 := off + int(streamSize1)
   239  	start1 := off + 1
   240  
   241  	off2 := off1 + int(streamSize2)
   242  	start2 := off1 + 1
   243  
   244  	off3 := off2 + int(streamSize3)
   245  	start3 := off2 + 1
   246  
   247  	off4 := off3 + int(streamSize4)
   248  	start4 := off3 + 1
   249  
   250  	// We let the reverse bit readers read earlier bytes,
   251  	// because the Huffman tables ignore bits that they don't need.
   252  
   253  	rbr1, err := r.makeReverseBitReader(data, off1, start1-2)
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  
   258  	rbr2, err := r.makeReverseBitReader(data, off2, start2-2)
   259  	if err != nil {
   260  		return nil, err
   261  	}
   262  
   263  	rbr3, err := r.makeReverseBitReader(data, off3, start3-2)
   264  	if err != nil {
   265  		return nil, err
   266  	}
   267  
   268  	rbr4, err := r.makeReverseBitReader(data, off4, start4-2)
   269  	if err != nil {
   270  		return nil, err
   271  	}
   272  
   273  	out1 := len(outbuf)
   274  	out2 := out1 + regeneratedStreamSize
   275  	out3 := out2 + regeneratedStreamSize
   276  	out4 := out3 + regeneratedStreamSize
   277  
   278  	regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3
   279  
   280  	outbuf = append(outbuf, make([]byte, regeneratedSize)...)
   281  
   282  	huffTable := r.huffmanTable
   283  	huffBits := uint32(r.huffmanTableBits)
   284  	huffMask := (uint32(1) << huffBits) - 1
   285  
   286  	for i := 0; i < regeneratedStreamSize; i++ {
   287  		use4 := i < regeneratedStreamSize4
   288  
   289  		fetchHuff := func(rbr *reverseBitReader) (uint16, error) {
   290  			if !rbr.fetch(uint8(huffBits)) {
   291  				return 0, rbr.makeError("literals Huffman stream out of bits")
   292  			}
   293  			idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
   294  			return huffTable[idx], nil
   295  		}
   296  
   297  		t1, err := fetchHuff(&rbr1)
   298  		if err != nil {
   299  			return nil, err
   300  		}
   301  
   302  		t2, err := fetchHuff(&rbr2)
   303  		if err != nil {
   304  			return nil, err
   305  		}
   306  
   307  		t3, err := fetchHuff(&rbr3)
   308  		if err != nil {
   309  			return nil, err
   310  		}
   311  
   312  		if use4 {
   313  			t4, err := fetchHuff(&rbr4)
   314  			if err != nil {
   315  				return nil, err
   316  			}
   317  			outbuf[out4] = byte(t4 >> 8)
   318  			out4++
   319  			rbr4.cnt -= uint32(t4 & 0xff)
   320  		}
   321  
   322  		outbuf[out1] = byte(t1 >> 8)
   323  		out1++
   324  		rbr1.cnt -= uint32(t1 & 0xff)
   325  
   326  		outbuf[out2] = byte(t2 >> 8)
   327  		out2++
   328  		rbr2.cnt -= uint32(t2 & 0xff)
   329  
   330  		outbuf[out3] = byte(t3 >> 8)
   331  		out3++
   332  		rbr3.cnt -= uint32(t3 & 0xff)
   333  	}
   334  
   335  	return outbuf, nil
   336  }