github.com/bir3/gocompiler@v0.9.2202/extra/compress/zstd/dict.go (about) 1 package zstd 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 "io" 8 9 "github.com/bir3/gocompiler/extra/compress/huff0" 10 ) 11 12 type dict struct { 13 id uint32 14 15 litEnc *huff0.Scratch 16 llDec, ofDec, mlDec sequenceDec 17 //llEnc, ofEnc, mlEnc []*fseEncoder 18 offsets [3]int 19 content []byte 20 } 21 22 const dictMagic = "\x37\xa4\x30\xec" 23 24 // Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB. 25 const dictMaxLength = 1 << 31 26 27 // ID returns the dictionary id or 0 if d is nil. 28 func (d *dict) ID() uint32 { 29 if d == nil { 30 return 0 31 } 32 return d.id 33 } 34 35 // ContentSize returns the dictionary content size or 0 if d is nil. 36 func (d *dict) ContentSize() int { 37 if d == nil { 38 return 0 39 } 40 return len(d.content) 41 } 42 43 // Content returns the dictionary content. 44 func (d *dict) Content() []byte { 45 if d == nil { 46 return nil 47 } 48 return d.content 49 } 50 51 // Offsets returns the initial offsets. 52 func (d *dict) Offsets() [3]int { 53 if d == nil { 54 return [3]int{} 55 } 56 return d.offsets 57 } 58 59 // LitEncoder returns the literal encoder. 60 func (d *dict) LitEncoder() *huff0.Scratch { 61 if d == nil { 62 return nil 63 } 64 return d.litEnc 65 } 66 67 // Load a dictionary as described in 68 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format 69 func loadDict(b []byte) (*dict, error) { 70 // Check static field size. 71 if len(b) <= 8+(3*4) { 72 return nil, io.ErrUnexpectedEOF 73 } 74 d := dict{ 75 llDec: sequenceDec{fse: &fseDecoder{}}, 76 ofDec: sequenceDec{fse: &fseDecoder{}}, 77 mlDec: sequenceDec{fse: &fseDecoder{}}, 78 } 79 if string(b[:4]) != dictMagic { 80 return nil, ErrMagicMismatch 81 } 82 d.id = binary.LittleEndian.Uint32(b[4:8]) 83 if d.id == 0 { 84 return nil, errors.New("dictionaries cannot have ID 0") 85 } 86 87 // Read literal table 88 var err error 89 d.litEnc, b, err = huff0.ReadTable(b[8:], nil) 90 if err != nil { 91 return nil, fmt.Errorf("loading literal table: %w", err) 92 } 93 d.litEnc.Reuse = huff0.ReusePolicyMust 94 95 br := byteReader{ 96 b: b, 97 off: 0, 98 } 99 readDec := func(i tableIndex, dec *fseDecoder) error { 100 if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil { 101 return err 102 } 103 if br.overread() { 104 return io.ErrUnexpectedEOF 105 } 106 err = dec.transform(symbolTableX[i]) 107 if err != nil { 108 println("Transform table error:", err) 109 return err 110 } 111 if debugDecoder || debugEncoder { 112 println("Read table ok", "symbolLen:", dec.symbolLen) 113 } 114 // Set decoders as predefined so they aren't reused. 115 dec.preDefined = true 116 return nil 117 } 118 119 if err := readDec(tableOffsets, d.ofDec.fse); err != nil { 120 return nil, err 121 } 122 if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil { 123 return nil, err 124 } 125 if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil { 126 return nil, err 127 } 128 if br.remain() < 12 { 129 return nil, io.ErrUnexpectedEOF 130 } 131 132 d.offsets[0] = int(br.Uint32()) 133 br.advance(4) 134 d.offsets[1] = int(br.Uint32()) 135 br.advance(4) 136 d.offsets[2] = int(br.Uint32()) 137 br.advance(4) 138 if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 { 139 return nil, errors.New("invalid offset in dictionary") 140 } 141 d.content = make([]byte, br.remain()) 142 copy(d.content, br.unread()) 143 if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) { 144 return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets) 145 } 146 147 return &d, nil 148 } 149 150 // InspectDictionary loads a zstd dictionary and provides functions to inspect the content. 151 func InspectDictionary(b []byte) (interface { 152 ID() uint32 153 ContentSize() int 154 Content() []byte 155 Offsets() [3]int 156 LitEncoder() *huff0.Scratch 157 }, error) { 158 initPredefined() 159 d, err := loadDict(b) 160 return d, err 161 }