github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/dict.go (about)

     1  package zstd
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  
     9  	"github.com/bir3/gocompiler/extra/compress/huff0"
    10  )
    11  
    12  type dict struct {
    13  	id uint32
    14  
    15  	litEnc              *huff0.Scratch
    16  	llDec, ofDec, mlDec sequenceDec
    17  	//llEnc, ofEnc, mlEnc []*fseEncoder
    18  	offsets [3]int
    19  	content []byte
    20  }
    21  
    22  const dictMagic = "\x37\xa4\x30\xec"
    23  
    24  // Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB.
    25  const dictMaxLength = 1 << 31
    26  
    27  // ID returns the dictionary id or 0 if d is nil.
    28  func (d *dict) ID() uint32 {
    29  	if d == nil {
    30  		return 0
    31  	}
    32  	return d.id
    33  }
    34  
    35  // ContentSize returns the dictionary content size or 0 if d is nil.
    36  func (d *dict) ContentSize() int {
    37  	if d == nil {
    38  		return 0
    39  	}
    40  	return len(d.content)
    41  }
    42  
    43  // Content returns the dictionary content.
    44  func (d *dict) Content() []byte {
    45  	if d == nil {
    46  		return nil
    47  	}
    48  	return d.content
    49  }
    50  
    51  // Offsets returns the initial offsets.
    52  func (d *dict) Offsets() [3]int {
    53  	if d == nil {
    54  		return [3]int{}
    55  	}
    56  	return d.offsets
    57  }
    58  
    59  // LitEncoder returns the literal encoder.
    60  func (d *dict) LitEncoder() *huff0.Scratch {
    61  	if d == nil {
    62  		return nil
    63  	}
    64  	return d.litEnc
    65  }
    66  
    67  // Load a dictionary as described in
    68  // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
    69  func loadDict(b []byte) (*dict, error) {
    70  	// Check static field size.
    71  	if len(b) <= 8+(3*4) {
    72  		return nil, io.ErrUnexpectedEOF
    73  	}
    74  	d := dict{
    75  		llDec: sequenceDec{fse: &fseDecoder{}},
    76  		ofDec: sequenceDec{fse: &fseDecoder{}},
    77  		mlDec: sequenceDec{fse: &fseDecoder{}},
    78  	}
    79  	if string(b[:4]) != dictMagic {
    80  		return nil, ErrMagicMismatch
    81  	}
    82  	d.id = binary.LittleEndian.Uint32(b[4:8])
    83  	if d.id == 0 {
    84  		return nil, errors.New("dictionaries cannot have ID 0")
    85  	}
    86  
    87  	// Read literal table
    88  	var err error
    89  	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
    90  	if err != nil {
    91  		return nil, fmt.Errorf("loading literal table: %w", err)
    92  	}
    93  	d.litEnc.Reuse = huff0.ReusePolicyMust
    94  
    95  	br := byteReader{
    96  		b:   b,
    97  		off: 0,
    98  	}
    99  	readDec := func(i tableIndex, dec *fseDecoder) error {
   100  		if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
   101  			return err
   102  		}
   103  		if br.overread() {
   104  			return io.ErrUnexpectedEOF
   105  		}
   106  		err = dec.transform(symbolTableX[i])
   107  		if err != nil {
   108  			println("Transform table error:", err)
   109  			return err
   110  		}
   111  		if debugDecoder || debugEncoder {
   112  			println("Read table ok", "symbolLen:", dec.symbolLen)
   113  		}
   114  		// Set decoders as predefined so they aren't reused.
   115  		dec.preDefined = true
   116  		return nil
   117  	}
   118  
   119  	if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
   120  		return nil, err
   121  	}
   122  	if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
   123  		return nil, err
   124  	}
   125  	if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
   126  		return nil, err
   127  	}
   128  	if br.remain() < 12 {
   129  		return nil, io.ErrUnexpectedEOF
   130  	}
   131  
   132  	d.offsets[0] = int(br.Uint32())
   133  	br.advance(4)
   134  	d.offsets[1] = int(br.Uint32())
   135  	br.advance(4)
   136  	d.offsets[2] = int(br.Uint32())
   137  	br.advance(4)
   138  	if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
   139  		return nil, errors.New("invalid offset in dictionary")
   140  	}
   141  	d.content = make([]byte, br.remain())
   142  	copy(d.content, br.unread())
   143  	if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
   144  		return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
   145  	}
   146  
   147  	return &d, nil
   148  }
   149  
   150  // InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
   151  func InspectDictionary(b []byte) (interface {
   152  	ID() uint32
   153  	ContentSize() int
   154  	Content() []byte
   155  	Offsets() [3]int
   156  	LitEncoder() *huff0.Scratch
   157  }, error) {
   158  	initPredefined()
   159  	d, err := loadDict(b)
   160  	return d, err
   161  }