github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/compress/huff0/decompress_amd64.go (about) 1 //go:build amd64 && !appengine && !noasm && gc 2 // +build amd64,!appengine,!noasm,gc 3 4 // This file contains the specialisation of Decoder.Decompress4X 5 // and Decoder.Decompress1X that use an asm implementation of thir main loops. 6 package huff0 7 8 import ( 9 "errors" 10 "fmt" 11 12 "github.com/bir3/gocompiler/src/cmd/gocmd/compress/internal/cpuinfo" 13 ) 14 15 // decompress4x_main_loop_x86 is an x86 assembler implementation 16 // of Decompress4X when tablelog > 8. 17 // 18 //go:noescape 19 func decompress4x_main_loop_amd64(ctx *decompress4xContext) 20 21 // decompress4x_8b_loop_x86 is an x86 assembler implementation 22 // of Decompress4X when tablelog <= 8 which decodes 4 entries 23 // per loop. 24 // 25 //go:noescape 26 func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) 27 28 // fallback8BitSize is the size where using Go version is faster. 29 const fallback8BitSize = 800 30 31 type decompress4xContext struct { 32 pbr *[4]bitReaderShifted 33 peekBits uint8 34 out *byte 35 dstEvery int 36 tbl *dEntrySingle 37 decoded int 38 limit *byte 39 } 40 41 // Decompress4X will decompress a 4X encoded stream. 42 // The length of the supplied input must match the end of a block exactly. 43 // The *capacity* of the dst slice must match the destination size of 44 // the uncompressed data exactly. 45 func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { 46 if len(d.dt.single) == 0 { 47 return nil, errors.New("no table loaded") 48 } 49 if len(src) < 6+(4*1) { 50 return nil, errors.New("input too small") 51 } 52 53 use8BitTables := d.actualTableLog <= 8 54 if cap(dst) < fallback8BitSize && use8BitTables { 55 return d.decompress4X8bit(dst, src) 56 } 57 58 var br [4]bitReaderShifted 59 // Decode "jump table" 60 start := 6 61 for i := 0; i < 3; i++ { 62 length := int(src[i*2]) | (int(src[i*2+1]) << 8) 63 if start+length >= len(src) { 64 return nil, errors.New("truncated input (or invalid offset)") 65 } 66 err := br[i].init(src[start : start+length]) 67 if err != nil { 68 return nil, err 69 } 70 start += length 71 } 72 err := br[3].init(src[start:]) 73 if err != nil { 74 return nil, err 75 } 76 77 // destination, offset to match first output 78 dstSize := cap(dst) 79 dst = dst[:dstSize] 80 out := dst 81 dstEvery := (dstSize + 3) / 4 82 83 const tlSize = 1 << tableLogMax 84 const tlMask = tlSize - 1 85 single := d.dt.single[:tlSize] 86 87 var decoded int 88 89 if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { 90 ctx := decompress4xContext{ 91 pbr: &br, 92 peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() 93 out: &out[0], 94 dstEvery: dstEvery, 95 tbl: &single[0], 96 limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. 97 } 98 if use8BitTables { 99 decompress4x_8b_main_loop_amd64(&ctx) 100 } else { 101 decompress4x_main_loop_amd64(&ctx) 102 } 103 104 decoded = ctx.decoded 105 out = out[decoded/4:] 106 } 107 108 // Decode remaining. 109 remainBytes := dstEvery - (decoded / 4) 110 for i := range br { 111 offset := dstEvery * i 112 endsAt := offset + remainBytes 113 if endsAt > len(out) { 114 endsAt = len(out) 115 } 116 br := &br[i] 117 bitsLeft := br.remaining() 118 for bitsLeft > 0 { 119 br.fill() 120 if offset >= endsAt { 121 return nil, errors.New("corruption detected: stream overrun 4") 122 } 123 124 // Read value and increment offset. 125 val := br.peekBitsFast(d.actualTableLog) 126 v := single[val&tlMask].entry 127 nBits := uint8(v) 128 br.advance(nBits) 129 bitsLeft -= uint(nBits) 130 out[offset] = uint8(v >> 8) 131 offset++ 132 } 133 if offset != endsAt { 134 return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) 135 } 136 decoded += offset - dstEvery*i 137 err = br.close() 138 if err != nil { 139 return nil, err 140 } 141 } 142 if dstSize != decoded { 143 return nil, errors.New("corruption detected: short output block") 144 } 145 return dst, nil 146 } 147 148 // decompress4x_main_loop_x86 is an x86 assembler implementation 149 // of Decompress1X when tablelog > 8. 150 // 151 //go:noescape 152 func decompress1x_main_loop_amd64(ctx *decompress1xContext) 153 154 // decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation 155 // of Decompress1X when tablelog > 8. 156 // 157 //go:noescape 158 func decompress1x_main_loop_bmi2(ctx *decompress1xContext) 159 160 type decompress1xContext struct { 161 pbr *bitReaderShifted 162 peekBits uint8 163 out *byte 164 outCap int 165 tbl *dEntrySingle 166 decoded int 167 } 168 169 // Error reported by asm implementations 170 const error_max_decoded_size_exeeded = -1 171 172 // Decompress1X will decompress a 1X encoded stream. 173 // The cap of the output buffer will be the maximum decompressed size. 174 // The length of the supplied input must match the end of a block exactly. 175 func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { 176 if len(d.dt.single) == 0 { 177 return nil, errors.New("no table loaded") 178 } 179 var br bitReaderShifted 180 err := br.init(src) 181 if err != nil { 182 return dst, err 183 } 184 maxDecodedSize := cap(dst) 185 dst = dst[:maxDecodedSize] 186 187 const tlSize = 1 << tableLogMax 188 const tlMask = tlSize - 1 189 190 if maxDecodedSize >= 4 { 191 ctx := decompress1xContext{ 192 pbr: &br, 193 out: &dst[0], 194 outCap: maxDecodedSize, 195 peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() 196 tbl: &d.dt.single[0], 197 } 198 199 if cpuinfo.HasBMI2() { 200 decompress1x_main_loop_bmi2(&ctx) 201 } else { 202 decompress1x_main_loop_amd64(&ctx) 203 } 204 if ctx.decoded == error_max_decoded_size_exeeded { 205 return nil, ErrMaxDecodedSizeExceeded 206 } 207 208 dst = dst[:ctx.decoded] 209 } 210 211 // br < 8, so uint8 is fine 212 bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead 213 for bitsLeft > 0 { 214 br.fill() 215 if len(dst) >= maxDecodedSize { 216 br.close() 217 return nil, ErrMaxDecodedSizeExceeded 218 } 219 v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] 220 nBits := uint8(v.entry) 221 br.advance(nBits) 222 bitsLeft -= nBits 223 dst = append(dst, uint8(v.entry>>8)) 224 } 225 return dst, br.close() 226 }