github.com/bir3/gocompiler@v0.9.2202/extra/compress/huff0/decompress.go (about)

     1  package huff0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"sync"
     8  
     9  	"github.com/bir3/gocompiler/extra/compress/fse"
    10  )
    11  
    12  type dTable struct {
    13  	single []dEntrySingle
    14  }
    15  
    16  // single-symbols decoding
    17  type dEntrySingle struct {
    18  	entry uint16
    19  }
    20  
    21  // Uses special code for all tables that are < 8 bits.
    22  const use8BitTables = true
    23  
    24  // ReadTable will read a table from the input.
    25  // The size of the input may be larger than the table definition.
    26  // Any content remaining after the table definition will be returned.
    27  // If no Scratch is provided a new one is allocated.
    28  // The returned Scratch can be used for encoding or decoding input using this table.
    29  func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
    30  	s, err = s.prepare(nil)
    31  	if err != nil {
    32  		return s, nil, err
    33  	}
    34  	if len(in) <= 1 {
    35  		return s, nil, errors.New("input too small for table")
    36  	}
    37  	iSize := in[0]
    38  	in = in[1:]
    39  	if iSize >= 128 {
    40  		// Uncompressed
    41  		oSize := iSize - 127
    42  		iSize = (oSize + 1) / 2
    43  		if int(iSize) > len(in) {
    44  			return s, nil, errors.New("input too small for table")
    45  		}
    46  		for n := uint8(0); n < oSize; n += 2 {
    47  			v := in[n/2]
    48  			s.huffWeight[n] = v >> 4
    49  			s.huffWeight[n+1] = v & 15
    50  		}
    51  		s.symbolLen = uint16(oSize)
    52  		in = in[iSize:]
    53  	} else {
    54  		if len(in) < int(iSize) {
    55  			return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
    56  		}
    57  		// FSE compressed weights
    58  		s.fse.DecompressLimit = 255
    59  		hw := s.huffWeight[:]
    60  		s.fse.Out = hw
    61  		b, err := fse.Decompress(in[:iSize], s.fse)
    62  		s.fse.Out = nil
    63  		if err != nil {
    64  			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
    65  		}
    66  		if len(b) > 255 {
    67  			return s, nil, errors.New("corrupt input: output table too large")
    68  		}
    69  		s.symbolLen = uint16(len(b))
    70  		in = in[iSize:]
    71  	}
    72  
    73  	// collect weight stats
    74  	var rankStats [16]uint32
    75  	weightTotal := uint32(0)
    76  	for _, v := range s.huffWeight[:s.symbolLen] {
    77  		if v > tableLogMax {
    78  			return s, nil, errors.New("corrupt input: weight too large")
    79  		}
    80  		v2 := v & 15
    81  		rankStats[v2]++
    82  		// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
    83  		weightTotal += (1 << v2) >> 1
    84  	}
    85  	if weightTotal == 0 {
    86  		return s, nil, errors.New("corrupt input: weights zero")
    87  	}
    88  
    89  	// get last non-null symbol weight (implied, total must be 2^n)
    90  	{
    91  		tableLog := highBit32(weightTotal) + 1
    92  		if tableLog > tableLogMax {
    93  			return s, nil, errors.New("corrupt input: tableLog too big")
    94  		}
    95  		s.actualTableLog = uint8(tableLog)
    96  		// determine last weight
    97  		{
    98  			total := uint32(1) << tableLog
    99  			rest := total - weightTotal
   100  			verif := uint32(1) << highBit32(rest)
   101  			lastWeight := highBit32(rest) + 1
   102  			if verif != rest {
   103  				// last value must be a clean power of 2
   104  				return s, nil, errors.New("corrupt input: last value not power of two")
   105  			}
   106  			s.huffWeight[s.symbolLen] = uint8(lastWeight)
   107  			s.symbolLen++
   108  			rankStats[lastWeight]++
   109  		}
   110  	}
   111  
   112  	if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
   113  		// by construction : at least 2 elts of rank 1, must be even
   114  		return s, nil, errors.New("corrupt input: min elt size, even check failed ")
   115  	}
   116  
   117  	// TODO: Choose between single/double symbol decoding
   118  
   119  	// Calculate starting value for each rank
   120  	{
   121  		var nextRankStart uint32
   122  		for n := uint8(1); n < s.actualTableLog+1; n++ {
   123  			current := nextRankStart
   124  			nextRankStart += rankStats[n] << (n - 1)
   125  			rankStats[n] = current
   126  		}
   127  	}
   128  
   129  	// fill DTable (always full size)
   130  	tSize := 1 << tableLogMax
   131  	if len(s.dt.single) != tSize {
   132  		s.dt.single = make([]dEntrySingle, tSize)
   133  	}
   134  	cTable := s.prevTable
   135  	if cap(cTable) < maxSymbolValue+1 {
   136  		cTable = make([]cTableEntry, 0, maxSymbolValue+1)
   137  	}
   138  	cTable = cTable[:maxSymbolValue+1]
   139  	s.prevTable = cTable[:s.symbolLen]
   140  	s.prevTableLog = s.actualTableLog
   141  
   142  	for n, w := range s.huffWeight[:s.symbolLen] {
   143  		if w == 0 {
   144  			cTable[n] = cTableEntry{
   145  				val:   0,
   146  				nBits: 0,
   147  			}
   148  			continue
   149  		}
   150  		length := (uint32(1) << w) >> 1
   151  		d := dEntrySingle{
   152  			entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
   153  		}
   154  
   155  		rank := &rankStats[w]
   156  		cTable[n] = cTableEntry{
   157  			val:   uint16(*rank >> (w - 1)),
   158  			nBits: uint8(d.entry),
   159  		}
   160  
   161  		single := s.dt.single[*rank : *rank+length]
   162  		for i := range single {
   163  			single[i] = d
   164  		}
   165  		*rank += length
   166  	}
   167  
   168  	return s, in, nil
   169  }
   170  
   171  // Decompress1X will decompress a 1X encoded stream.
   172  // The length of the supplied input must match the end of a block exactly.
   173  // Before this is called, the table must be initialized with ReadTable unless
   174  // the encoder re-used the table.
   175  // deprecated: Use the stateless Decoder() to get a concurrent version.
   176  func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
   177  	if cap(s.Out) < s.MaxDecodedSize {
   178  		s.Out = make([]byte, s.MaxDecodedSize)
   179  	}
   180  	s.Out = s.Out[:0:s.MaxDecodedSize]
   181  	s.Out, err = s.Decoder().Decompress1X(s.Out, in)
   182  	return s.Out, err
   183  }
   184  
   185  // Decompress4X will decompress a 4X encoded stream.
   186  // Before this is called, the table must be initialized with ReadTable unless
   187  // the encoder re-used the table.
   188  // The length of the supplied input must match the end of a block exactly.
   189  // The destination size of the uncompressed data must be known and provided.
   190  // deprecated: Use the stateless Decoder() to get a concurrent version.
   191  func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
   192  	if dstSize > s.MaxDecodedSize {
   193  		return nil, ErrMaxDecodedSizeExceeded
   194  	}
   195  	if cap(s.Out) < dstSize {
   196  		s.Out = make([]byte, s.MaxDecodedSize)
   197  	}
   198  	s.Out = s.Out[:0:dstSize]
   199  	s.Out, err = s.Decoder().Decompress4X(s.Out, in)
   200  	return s.Out, err
   201  }
   202  
   203  // Decoder will return a stateless decoder that can be used by multiple
   204  // decompressors concurrently.
   205  // Before this is called, the table must be initialized with ReadTable.
   206  // The Decoder is still linked to the scratch buffer so that cannot be reused.
   207  // However, it is safe to discard the scratch.
   208  func (s *Scratch) Decoder() *Decoder {
   209  	return &Decoder{
   210  		dt:             s.dt,
   211  		actualTableLog: s.actualTableLog,
   212  		bufs:           &s.decPool,
   213  	}
   214  }
   215  
   216  // Decoder provides stateless decoding.
   217  type Decoder struct {
   218  	dt             dTable
   219  	actualTableLog uint8
   220  	bufs           *sync.Pool
   221  }
   222  
   223  func (d *Decoder) buffer() *[4][256]byte {
   224  	buf, ok := d.bufs.Get().(*[4][256]byte)
   225  	if ok {
   226  		return buf
   227  	}
   228  	return &[4][256]byte{}
   229  }
   230  
   231  // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
   232  // The cap of the output buffer will be the maximum decompressed size.
   233  // The length of the supplied input must match the end of a block exactly.
   234  func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
   235  	if d.actualTableLog == 8 {
   236  		return d.decompress1X8BitExactly(dst, src)
   237  	}
   238  	var br bitReaderBytes
   239  	err := br.init(src)
   240  	if err != nil {
   241  		return dst, err
   242  	}
   243  	maxDecodedSize := cap(dst)
   244  	dst = dst[:0]
   245  
   246  	// Avoid bounds check by always having full sized table.
   247  	dt := d.dt.single[:256]
   248  
   249  	// Use temp table to avoid bound checks/append penalty.
   250  	bufs := d.buffer()
   251  	buf := &bufs[0]
   252  	var off uint8
   253  
   254  	switch d.actualTableLog {
   255  	case 8:
   256  		const shift = 8 - 8
   257  		for br.off >= 4 {
   258  			br.fillFast()
   259  			v := dt[uint8(br.value>>(56+shift))]
   260  			br.advance(uint8(v.entry))
   261  			buf[off+0] = uint8(v.entry >> 8)
   262  
   263  			v = dt[uint8(br.value>>(56+shift))]
   264  			br.advance(uint8(v.entry))
   265  			buf[off+1] = uint8(v.entry >> 8)
   266  
   267  			v = dt[uint8(br.value>>(56+shift))]
   268  			br.advance(uint8(v.entry))
   269  			buf[off+2] = uint8(v.entry >> 8)
   270  
   271  			v = dt[uint8(br.value>>(56+shift))]
   272  			br.advance(uint8(v.entry))
   273  			buf[off+3] = uint8(v.entry >> 8)
   274  
   275  			off += 4
   276  			if off == 0 {
   277  				if len(dst)+256 > maxDecodedSize {
   278  					br.close()
   279  					d.bufs.Put(bufs)
   280  					return nil, ErrMaxDecodedSizeExceeded
   281  				}
   282  				dst = append(dst, buf[:]...)
   283  			}
   284  		}
   285  	case 7:
   286  		const shift = 8 - 7
   287  		for br.off >= 4 {
   288  			br.fillFast()
   289  			v := dt[uint8(br.value>>(56+shift))]
   290  			br.advance(uint8(v.entry))
   291  			buf[off+0] = uint8(v.entry >> 8)
   292  
   293  			v = dt[uint8(br.value>>(56+shift))]
   294  			br.advance(uint8(v.entry))
   295  			buf[off+1] = uint8(v.entry >> 8)
   296  
   297  			v = dt[uint8(br.value>>(56+shift))]
   298  			br.advance(uint8(v.entry))
   299  			buf[off+2] = uint8(v.entry >> 8)
   300  
   301  			v = dt[uint8(br.value>>(56+shift))]
   302  			br.advance(uint8(v.entry))
   303  			buf[off+3] = uint8(v.entry >> 8)
   304  
   305  			off += 4
   306  			if off == 0 {
   307  				if len(dst)+256 > maxDecodedSize {
   308  					br.close()
   309  					d.bufs.Put(bufs)
   310  					return nil, ErrMaxDecodedSizeExceeded
   311  				}
   312  				dst = append(dst, buf[:]...)
   313  			}
   314  		}
   315  	case 6:
   316  		const shift = 8 - 6
   317  		for br.off >= 4 {
   318  			br.fillFast()
   319  			v := dt[uint8(br.value>>(56+shift))]
   320  			br.advance(uint8(v.entry))
   321  			buf[off+0] = uint8(v.entry >> 8)
   322  
   323  			v = dt[uint8(br.value>>(56+shift))]
   324  			br.advance(uint8(v.entry))
   325  			buf[off+1] = uint8(v.entry >> 8)
   326  
   327  			v = dt[uint8(br.value>>(56+shift))]
   328  			br.advance(uint8(v.entry))
   329  			buf[off+2] = uint8(v.entry >> 8)
   330  
   331  			v = dt[uint8(br.value>>(56+shift))]
   332  			br.advance(uint8(v.entry))
   333  			buf[off+3] = uint8(v.entry >> 8)
   334  
   335  			off += 4
   336  			if off == 0 {
   337  				if len(dst)+256 > maxDecodedSize {
   338  					d.bufs.Put(bufs)
   339  					br.close()
   340  					return nil, ErrMaxDecodedSizeExceeded
   341  				}
   342  				dst = append(dst, buf[:]...)
   343  			}
   344  		}
   345  	case 5:
   346  		const shift = 8 - 5
   347  		for br.off >= 4 {
   348  			br.fillFast()
   349  			v := dt[uint8(br.value>>(56+shift))]
   350  			br.advance(uint8(v.entry))
   351  			buf[off+0] = uint8(v.entry >> 8)
   352  
   353  			v = dt[uint8(br.value>>(56+shift))]
   354  			br.advance(uint8(v.entry))
   355  			buf[off+1] = uint8(v.entry >> 8)
   356  
   357  			v = dt[uint8(br.value>>(56+shift))]
   358  			br.advance(uint8(v.entry))
   359  			buf[off+2] = uint8(v.entry >> 8)
   360  
   361  			v = dt[uint8(br.value>>(56+shift))]
   362  			br.advance(uint8(v.entry))
   363  			buf[off+3] = uint8(v.entry >> 8)
   364  
   365  			off += 4
   366  			if off == 0 {
   367  				if len(dst)+256 > maxDecodedSize {
   368  					d.bufs.Put(bufs)
   369  					br.close()
   370  					return nil, ErrMaxDecodedSizeExceeded
   371  				}
   372  				dst = append(dst, buf[:]...)
   373  			}
   374  		}
   375  	case 4:
   376  		const shift = 8 - 4
   377  		for br.off >= 4 {
   378  			br.fillFast()
   379  			v := dt[uint8(br.value>>(56+shift))]
   380  			br.advance(uint8(v.entry))
   381  			buf[off+0] = uint8(v.entry >> 8)
   382  
   383  			v = dt[uint8(br.value>>(56+shift))]
   384  			br.advance(uint8(v.entry))
   385  			buf[off+1] = uint8(v.entry >> 8)
   386  
   387  			v = dt[uint8(br.value>>(56+shift))]
   388  			br.advance(uint8(v.entry))
   389  			buf[off+2] = uint8(v.entry >> 8)
   390  
   391  			v = dt[uint8(br.value>>(56+shift))]
   392  			br.advance(uint8(v.entry))
   393  			buf[off+3] = uint8(v.entry >> 8)
   394  
   395  			off += 4
   396  			if off == 0 {
   397  				if len(dst)+256 > maxDecodedSize {
   398  					d.bufs.Put(bufs)
   399  					br.close()
   400  					return nil, ErrMaxDecodedSizeExceeded
   401  				}
   402  				dst = append(dst, buf[:]...)
   403  			}
   404  		}
   405  	case 3:
   406  		const shift = 8 - 3
   407  		for br.off >= 4 {
   408  			br.fillFast()
   409  			v := dt[uint8(br.value>>(56+shift))]
   410  			br.advance(uint8(v.entry))
   411  			buf[off+0] = uint8(v.entry >> 8)
   412  
   413  			v = dt[uint8(br.value>>(56+shift))]
   414  			br.advance(uint8(v.entry))
   415  			buf[off+1] = uint8(v.entry >> 8)
   416  
   417  			v = dt[uint8(br.value>>(56+shift))]
   418  			br.advance(uint8(v.entry))
   419  			buf[off+2] = uint8(v.entry >> 8)
   420  
   421  			v = dt[uint8(br.value>>(56+shift))]
   422  			br.advance(uint8(v.entry))
   423  			buf[off+3] = uint8(v.entry >> 8)
   424  
   425  			off += 4
   426  			if off == 0 {
   427  				if len(dst)+256 > maxDecodedSize {
   428  					d.bufs.Put(bufs)
   429  					br.close()
   430  					return nil, ErrMaxDecodedSizeExceeded
   431  				}
   432  				dst = append(dst, buf[:]...)
   433  			}
   434  		}
   435  	case 2:
   436  		const shift = 8 - 2
   437  		for br.off >= 4 {
   438  			br.fillFast()
   439  			v := dt[uint8(br.value>>(56+shift))]
   440  			br.advance(uint8(v.entry))
   441  			buf[off+0] = uint8(v.entry >> 8)
   442  
   443  			v = dt[uint8(br.value>>(56+shift))]
   444  			br.advance(uint8(v.entry))
   445  			buf[off+1] = uint8(v.entry >> 8)
   446  
   447  			v = dt[uint8(br.value>>(56+shift))]
   448  			br.advance(uint8(v.entry))
   449  			buf[off+2] = uint8(v.entry >> 8)
   450  
   451  			v = dt[uint8(br.value>>(56+shift))]
   452  			br.advance(uint8(v.entry))
   453  			buf[off+3] = uint8(v.entry >> 8)
   454  
   455  			off += 4
   456  			if off == 0 {
   457  				if len(dst)+256 > maxDecodedSize {
   458  					d.bufs.Put(bufs)
   459  					br.close()
   460  					return nil, ErrMaxDecodedSizeExceeded
   461  				}
   462  				dst = append(dst, buf[:]...)
   463  			}
   464  		}
   465  	case 1:
   466  		const shift = 8 - 1
   467  		for br.off >= 4 {
   468  			br.fillFast()
   469  			v := dt[uint8(br.value>>(56+shift))]
   470  			br.advance(uint8(v.entry))
   471  			buf[off+0] = uint8(v.entry >> 8)
   472  
   473  			v = dt[uint8(br.value>>(56+shift))]
   474  			br.advance(uint8(v.entry))
   475  			buf[off+1] = uint8(v.entry >> 8)
   476  
   477  			v = dt[uint8(br.value>>(56+shift))]
   478  			br.advance(uint8(v.entry))
   479  			buf[off+2] = uint8(v.entry >> 8)
   480  
   481  			v = dt[uint8(br.value>>(56+shift))]
   482  			br.advance(uint8(v.entry))
   483  			buf[off+3] = uint8(v.entry >> 8)
   484  
   485  			off += 4
   486  			if off == 0 {
   487  				if len(dst)+256 > maxDecodedSize {
   488  					d.bufs.Put(bufs)
   489  					br.close()
   490  					return nil, ErrMaxDecodedSizeExceeded
   491  				}
   492  				dst = append(dst, buf[:]...)
   493  			}
   494  		}
   495  	default:
   496  		d.bufs.Put(bufs)
   497  		return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
   498  	}
   499  
   500  	if len(dst)+int(off) > maxDecodedSize {
   501  		d.bufs.Put(bufs)
   502  		br.close()
   503  		return nil, ErrMaxDecodedSizeExceeded
   504  	}
   505  	dst = append(dst, buf[:off]...)
   506  
   507  	// br < 4, so uint8 is fine
   508  	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
   509  	shift := (8 - d.actualTableLog) & 7
   510  
   511  	for bitsLeft > 0 {
   512  		if br.bitsRead >= 64-8 {
   513  			for br.off > 0 {
   514  				br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
   515  				br.bitsRead -= 8
   516  				br.off--
   517  			}
   518  		}
   519  		if len(dst) >= maxDecodedSize {
   520  			br.close()
   521  			d.bufs.Put(bufs)
   522  			return nil, ErrMaxDecodedSizeExceeded
   523  		}
   524  		v := dt[br.peekByteFast()>>shift]
   525  		nBits := uint8(v.entry)
   526  		br.advance(nBits)
   527  		bitsLeft -= int8(nBits)
   528  		dst = append(dst, uint8(v.entry>>8))
   529  	}
   530  	d.bufs.Put(bufs)
   531  	return dst, br.close()
   532  }
   533  
   534  // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
   535  // The cap of the output buffer will be the maximum decompressed size.
   536  // The length of the supplied input must match the end of a block exactly.
   537  func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
   538  	var br bitReaderBytes
   539  	err := br.init(src)
   540  	if err != nil {
   541  		return dst, err
   542  	}
   543  	maxDecodedSize := cap(dst)
   544  	dst = dst[:0]
   545  
   546  	// Avoid bounds check by always having full sized table.
   547  	dt := d.dt.single[:256]
   548  
   549  	// Use temp table to avoid bound checks/append penalty.
   550  	bufs := d.buffer()
   551  	buf := &bufs[0]
   552  	var off uint8
   553  
   554  	const shift = 56
   555  
   556  	//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
   557  	for br.off >= 4 {
   558  		br.fillFast()
   559  		v := dt[uint8(br.value>>shift)]
   560  		br.advance(uint8(v.entry))
   561  		buf[off+0] = uint8(v.entry >> 8)
   562  
   563  		v = dt[uint8(br.value>>shift)]
   564  		br.advance(uint8(v.entry))
   565  		buf[off+1] = uint8(v.entry >> 8)
   566  
   567  		v = dt[uint8(br.value>>shift)]
   568  		br.advance(uint8(v.entry))
   569  		buf[off+2] = uint8(v.entry >> 8)
   570  
   571  		v = dt[uint8(br.value>>shift)]
   572  		br.advance(uint8(v.entry))
   573  		buf[off+3] = uint8(v.entry >> 8)
   574  
   575  		off += 4
   576  		if off == 0 {
   577  			if len(dst)+256 > maxDecodedSize {
   578  				d.bufs.Put(bufs)
   579  				br.close()
   580  				return nil, ErrMaxDecodedSizeExceeded
   581  			}
   582  			dst = append(dst, buf[:]...)
   583  		}
   584  	}
   585  
   586  	if len(dst)+int(off) > maxDecodedSize {
   587  		d.bufs.Put(bufs)
   588  		br.close()
   589  		return nil, ErrMaxDecodedSizeExceeded
   590  	}
   591  	dst = append(dst, buf[:off]...)
   592  
   593  	// br < 4, so uint8 is fine
   594  	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
   595  	for bitsLeft > 0 {
   596  		if br.bitsRead >= 64-8 {
   597  			for br.off > 0 {
   598  				br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
   599  				br.bitsRead -= 8
   600  				br.off--
   601  			}
   602  		}
   603  		if len(dst) >= maxDecodedSize {
   604  			d.bufs.Put(bufs)
   605  			br.close()
   606  			return nil, ErrMaxDecodedSizeExceeded
   607  		}
   608  		v := dt[br.peekByteFast()]
   609  		nBits := uint8(v.entry)
   610  		br.advance(nBits)
   611  		bitsLeft -= int8(nBits)
   612  		dst = append(dst, uint8(v.entry>>8))
   613  	}
   614  	d.bufs.Put(bufs)
   615  	return dst, br.close()
   616  }
   617  
   618  // Decompress4X will decompress a 4X encoded stream.
   619  // The length of the supplied input must match the end of a block exactly.
   620  // The *capacity* of the dst slice must match the destination size of
   621  // the uncompressed data exactly.
   622  func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
   623  	if d.actualTableLog == 8 {
   624  		return d.decompress4X8bitExactly(dst, src)
   625  	}
   626  
   627  	var br [4]bitReaderBytes
   628  	start := 6
   629  	for i := 0; i < 3; i++ {
   630  		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
   631  		if start+length >= len(src) {
   632  			return nil, errors.New("truncated input (or invalid offset)")
   633  		}
   634  		err := br[i].init(src[start : start+length])
   635  		if err != nil {
   636  			return nil, err
   637  		}
   638  		start += length
   639  	}
   640  	err := br[3].init(src[start:])
   641  	if err != nil {
   642  		return nil, err
   643  	}
   644  
   645  	// destination, offset to match first output
   646  	dstSize := cap(dst)
   647  	dst = dst[:dstSize]
   648  	out := dst
   649  	dstEvery := (dstSize + 3) / 4
   650  
   651  	shift := (56 + (8 - d.actualTableLog)) & 63
   652  
   653  	const tlSize = 1 << 8
   654  	single := d.dt.single[:tlSize]
   655  
   656  	// Use temp table to avoid bound checks/append penalty.
   657  	buf := d.buffer()
   658  	var off uint8
   659  	var decoded int
   660  
   661  	// Decode 4 values from each decoder/loop.
   662  	const bufoff = 256
   663  	for {
   664  		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
   665  			break
   666  		}
   667  
   668  		{
   669  			// Interleave 2 decodes.
   670  			const stream = 0
   671  			const stream2 = 1
   672  			br1 := &br[stream]
   673  			br2 := &br[stream2]
   674  			br1.fillFast()
   675  			br2.fillFast()
   676  
   677  			v := single[uint8(br1.value>>shift)].entry
   678  			v2 := single[uint8(br2.value>>shift)].entry
   679  			br1.bitsRead += uint8(v)
   680  			br1.value <<= v & 63
   681  			br2.bitsRead += uint8(v2)
   682  			br2.value <<= v2 & 63
   683  			buf[stream][off] = uint8(v >> 8)
   684  			buf[stream2][off] = uint8(v2 >> 8)
   685  
   686  			v = single[uint8(br1.value>>shift)].entry
   687  			v2 = single[uint8(br2.value>>shift)].entry
   688  			br1.bitsRead += uint8(v)
   689  			br1.value <<= v & 63
   690  			br2.bitsRead += uint8(v2)
   691  			br2.value <<= v2 & 63
   692  			buf[stream][off+1] = uint8(v >> 8)
   693  			buf[stream2][off+1] = uint8(v2 >> 8)
   694  
   695  			v = single[uint8(br1.value>>shift)].entry
   696  			v2 = single[uint8(br2.value>>shift)].entry
   697  			br1.bitsRead += uint8(v)
   698  			br1.value <<= v & 63
   699  			br2.bitsRead += uint8(v2)
   700  			br2.value <<= v2 & 63
   701  			buf[stream][off+2] = uint8(v >> 8)
   702  			buf[stream2][off+2] = uint8(v2 >> 8)
   703  
   704  			v = single[uint8(br1.value>>shift)].entry
   705  			v2 = single[uint8(br2.value>>shift)].entry
   706  			br1.bitsRead += uint8(v)
   707  			br1.value <<= v & 63
   708  			br2.bitsRead += uint8(v2)
   709  			br2.value <<= v2 & 63
   710  			buf[stream][off+3] = uint8(v >> 8)
   711  			buf[stream2][off+3] = uint8(v2 >> 8)
   712  		}
   713  
   714  		{
   715  			const stream = 2
   716  			const stream2 = 3
   717  			br1 := &br[stream]
   718  			br2 := &br[stream2]
   719  			br1.fillFast()
   720  			br2.fillFast()
   721  
   722  			v := single[uint8(br1.value>>shift)].entry
   723  			v2 := single[uint8(br2.value>>shift)].entry
   724  			br1.bitsRead += uint8(v)
   725  			br1.value <<= v & 63
   726  			br2.bitsRead += uint8(v2)
   727  			br2.value <<= v2 & 63
   728  			buf[stream][off] = uint8(v >> 8)
   729  			buf[stream2][off] = uint8(v2 >> 8)
   730  
   731  			v = single[uint8(br1.value>>shift)].entry
   732  			v2 = single[uint8(br2.value>>shift)].entry
   733  			br1.bitsRead += uint8(v)
   734  			br1.value <<= v & 63
   735  			br2.bitsRead += uint8(v2)
   736  			br2.value <<= v2 & 63
   737  			buf[stream][off+1] = uint8(v >> 8)
   738  			buf[stream2][off+1] = uint8(v2 >> 8)
   739  
   740  			v = single[uint8(br1.value>>shift)].entry
   741  			v2 = single[uint8(br2.value>>shift)].entry
   742  			br1.bitsRead += uint8(v)
   743  			br1.value <<= v & 63
   744  			br2.bitsRead += uint8(v2)
   745  			br2.value <<= v2 & 63
   746  			buf[stream][off+2] = uint8(v >> 8)
   747  			buf[stream2][off+2] = uint8(v2 >> 8)
   748  
   749  			v = single[uint8(br1.value>>shift)].entry
   750  			v2 = single[uint8(br2.value>>shift)].entry
   751  			br1.bitsRead += uint8(v)
   752  			br1.value <<= v & 63
   753  			br2.bitsRead += uint8(v2)
   754  			br2.value <<= v2 & 63
   755  			buf[stream][off+3] = uint8(v >> 8)
   756  			buf[stream2][off+3] = uint8(v2 >> 8)
   757  		}
   758  
   759  		off += 4
   760  
   761  		if off == 0 {
   762  			if bufoff > dstEvery {
   763  				d.bufs.Put(buf)
   764  				return nil, errors.New("corruption detected: stream overrun 1")
   765  			}
   766  			// There must at least be 3 buffers left.
   767  			if len(out)-bufoff < dstEvery*3 {
   768  				d.bufs.Put(buf)
   769  				return nil, errors.New("corruption detected: stream overrun 2")
   770  			}
   771  			//copy(out, buf[0][:])
   772  			//copy(out[dstEvery:], buf[1][:])
   773  			//copy(out[dstEvery*2:], buf[2][:])
   774  			*(*[bufoff]byte)(out) = buf[0]
   775  			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
   776  			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
   777  			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
   778  			out = out[bufoff:]
   779  			decoded += bufoff * 4
   780  		}
   781  	}
   782  	if off > 0 {
   783  		ioff := int(off)
   784  		if len(out) < dstEvery*3+ioff {
   785  			d.bufs.Put(buf)
   786  			return nil, errors.New("corruption detected: stream overrun 3")
   787  		}
   788  		copy(out, buf[0][:off])
   789  		copy(out[dstEvery:], buf[1][:off])
   790  		copy(out[dstEvery*2:], buf[2][:off])
   791  		copy(out[dstEvery*3:], buf[3][:off])
   792  		decoded += int(off) * 4
   793  		out = out[off:]
   794  	}
   795  
   796  	// Decode remaining.
   797  	// Decode remaining.
   798  	remainBytes := dstEvery - (decoded / 4)
   799  	for i := range br {
   800  		offset := dstEvery * i
   801  		endsAt := offset + remainBytes
   802  		if endsAt > len(out) {
   803  			endsAt = len(out)
   804  		}
   805  		br := &br[i]
   806  		bitsLeft := br.remaining()
   807  		for bitsLeft > 0 {
   808  			if br.finished() {
   809  				d.bufs.Put(buf)
   810  				return nil, io.ErrUnexpectedEOF
   811  			}
   812  			if br.bitsRead >= 56 {
   813  				if br.off >= 4 {
   814  					v := br.in[br.off-4:]
   815  					v = v[:4]
   816  					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
   817  					br.value |= uint64(low) << (br.bitsRead - 32)
   818  					br.bitsRead -= 32
   819  					br.off -= 4
   820  				} else {
   821  					for br.off > 0 {
   822  						br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
   823  						br.bitsRead -= 8
   824  						br.off--
   825  					}
   826  				}
   827  			}
   828  			// end inline...
   829  			if offset >= endsAt {
   830  				d.bufs.Put(buf)
   831  				return nil, errors.New("corruption detected: stream overrun 4")
   832  			}
   833  
   834  			// Read value and increment offset.
   835  			v := single[uint8(br.value>>shift)].entry
   836  			nBits := uint8(v)
   837  			br.advance(nBits)
   838  			bitsLeft -= uint(nBits)
   839  			out[offset] = uint8(v >> 8)
   840  			offset++
   841  		}
   842  		if offset != endsAt {
   843  			d.bufs.Put(buf)
   844  			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
   845  		}
   846  		decoded += offset - dstEvery*i
   847  		err = br.close()
   848  		if err != nil {
   849  			d.bufs.Put(buf)
   850  			return nil, err
   851  		}
   852  	}
   853  	d.bufs.Put(buf)
   854  	if dstSize != decoded {
   855  		return nil, errors.New("corruption detected: short output block")
   856  	}
   857  	return dst, nil
   858  }
   859  
   860  // Decompress4X will decompress a 4X encoded stream.
   861  // The length of the supplied input must match the end of a block exactly.
   862  // The *capacity* of the dst slice must match the destination size of
   863  // the uncompressed data exactly.
   864  func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
   865  	var br [4]bitReaderBytes
   866  	start := 6
   867  	for i := 0; i < 3; i++ {
   868  		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
   869  		if start+length >= len(src) {
   870  			return nil, errors.New("truncated input (or invalid offset)")
   871  		}
   872  		err := br[i].init(src[start : start+length])
   873  		if err != nil {
   874  			return nil, err
   875  		}
   876  		start += length
   877  	}
   878  	err := br[3].init(src[start:])
   879  	if err != nil {
   880  		return nil, err
   881  	}
   882  
   883  	// destination, offset to match first output
   884  	dstSize := cap(dst)
   885  	dst = dst[:dstSize]
   886  	out := dst
   887  	dstEvery := (dstSize + 3) / 4
   888  
   889  	const shift = 56
   890  	const tlSize = 1 << 8
   891  	single := d.dt.single[:tlSize]
   892  
   893  	// Use temp table to avoid bound checks/append penalty.
   894  	buf := d.buffer()
   895  	var off uint8
   896  	var decoded int
   897  
   898  	// Decode 4 values from each decoder/loop.
   899  	const bufoff = 256
   900  	for {
   901  		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
   902  			break
   903  		}
   904  
   905  		{
   906  			// Interleave 2 decodes.
   907  			const stream = 0
   908  			const stream2 = 1
   909  			br1 := &br[stream]
   910  			br2 := &br[stream2]
   911  			br1.fillFast()
   912  			br2.fillFast()
   913  
   914  			v := single[uint8(br1.value>>shift)].entry
   915  			v2 := single[uint8(br2.value>>shift)].entry
   916  			br1.bitsRead += uint8(v)
   917  			br1.value <<= v & 63
   918  			br2.bitsRead += uint8(v2)
   919  			br2.value <<= v2 & 63
   920  			buf[stream][off] = uint8(v >> 8)
   921  			buf[stream2][off] = uint8(v2 >> 8)
   922  
   923  			v = single[uint8(br1.value>>shift)].entry
   924  			v2 = single[uint8(br2.value>>shift)].entry
   925  			br1.bitsRead += uint8(v)
   926  			br1.value <<= v & 63
   927  			br2.bitsRead += uint8(v2)
   928  			br2.value <<= v2 & 63
   929  			buf[stream][off+1] = uint8(v >> 8)
   930  			buf[stream2][off+1] = uint8(v2 >> 8)
   931  
   932  			v = single[uint8(br1.value>>shift)].entry
   933  			v2 = single[uint8(br2.value>>shift)].entry
   934  			br1.bitsRead += uint8(v)
   935  			br1.value <<= v & 63
   936  			br2.bitsRead += uint8(v2)
   937  			br2.value <<= v2 & 63
   938  			buf[stream][off+2] = uint8(v >> 8)
   939  			buf[stream2][off+2] = uint8(v2 >> 8)
   940  
   941  			v = single[uint8(br1.value>>shift)].entry
   942  			v2 = single[uint8(br2.value>>shift)].entry
   943  			br1.bitsRead += uint8(v)
   944  			br1.value <<= v & 63
   945  			br2.bitsRead += uint8(v2)
   946  			br2.value <<= v2 & 63
   947  			buf[stream][off+3] = uint8(v >> 8)
   948  			buf[stream2][off+3] = uint8(v2 >> 8)
   949  		}
   950  
   951  		{
   952  			const stream = 2
   953  			const stream2 = 3
   954  			br1 := &br[stream]
   955  			br2 := &br[stream2]
   956  			br1.fillFast()
   957  			br2.fillFast()
   958  
   959  			v := single[uint8(br1.value>>shift)].entry
   960  			v2 := single[uint8(br2.value>>shift)].entry
   961  			br1.bitsRead += uint8(v)
   962  			br1.value <<= v & 63
   963  			br2.bitsRead += uint8(v2)
   964  			br2.value <<= v2 & 63
   965  			buf[stream][off] = uint8(v >> 8)
   966  			buf[stream2][off] = uint8(v2 >> 8)
   967  
   968  			v = single[uint8(br1.value>>shift)].entry
   969  			v2 = single[uint8(br2.value>>shift)].entry
   970  			br1.bitsRead += uint8(v)
   971  			br1.value <<= v & 63
   972  			br2.bitsRead += uint8(v2)
   973  			br2.value <<= v2 & 63
   974  			buf[stream][off+1] = uint8(v >> 8)
   975  			buf[stream2][off+1] = uint8(v2 >> 8)
   976  
   977  			v = single[uint8(br1.value>>shift)].entry
   978  			v2 = single[uint8(br2.value>>shift)].entry
   979  			br1.bitsRead += uint8(v)
   980  			br1.value <<= v & 63
   981  			br2.bitsRead += uint8(v2)
   982  			br2.value <<= v2 & 63
   983  			buf[stream][off+2] = uint8(v >> 8)
   984  			buf[stream2][off+2] = uint8(v2 >> 8)
   985  
   986  			v = single[uint8(br1.value>>shift)].entry
   987  			v2 = single[uint8(br2.value>>shift)].entry
   988  			br1.bitsRead += uint8(v)
   989  			br1.value <<= v & 63
   990  			br2.bitsRead += uint8(v2)
   991  			br2.value <<= v2 & 63
   992  			buf[stream][off+3] = uint8(v >> 8)
   993  			buf[stream2][off+3] = uint8(v2 >> 8)
   994  		}
   995  
   996  		off += 4
   997  
   998  		if off == 0 {
   999  			if bufoff > dstEvery {
  1000  				d.bufs.Put(buf)
  1001  				return nil, errors.New("corruption detected: stream overrun 1")
  1002  			}
  1003  			// There must at least be 3 buffers left.
  1004  			if len(out)-bufoff < dstEvery*3 {
  1005  				d.bufs.Put(buf)
  1006  				return nil, errors.New("corruption detected: stream overrun 2")
  1007  			}
  1008  
  1009  			//copy(out, buf[0][:])
  1010  			//copy(out[dstEvery:], buf[1][:])
  1011  			//copy(out[dstEvery*2:], buf[2][:])
  1012  			// copy(out[dstEvery*3:], buf[3][:])
  1013  			*(*[bufoff]byte)(out) = buf[0]
  1014  			*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
  1015  			*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
  1016  			*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
  1017  			out = out[bufoff:]
  1018  			decoded += bufoff * 4
  1019  		}
  1020  	}
  1021  	if off > 0 {
  1022  		ioff := int(off)
  1023  		if len(out) < dstEvery*3+ioff {
  1024  			return nil, errors.New("corruption detected: stream overrun 3")
  1025  		}
  1026  		copy(out, buf[0][:off])
  1027  		copy(out[dstEvery:], buf[1][:off])
  1028  		copy(out[dstEvery*2:], buf[2][:off])
  1029  		copy(out[dstEvery*3:], buf[3][:off])
  1030  		decoded += int(off) * 4
  1031  		out = out[off:]
  1032  	}
  1033  
  1034  	// Decode remaining.
  1035  	remainBytes := dstEvery - (decoded / 4)
  1036  	for i := range br {
  1037  		offset := dstEvery * i
  1038  		endsAt := offset + remainBytes
  1039  		if endsAt > len(out) {
  1040  			endsAt = len(out)
  1041  		}
  1042  		br := &br[i]
  1043  		bitsLeft := br.remaining()
  1044  		for bitsLeft > 0 {
  1045  			if br.finished() {
  1046  				d.bufs.Put(buf)
  1047  				return nil, io.ErrUnexpectedEOF
  1048  			}
  1049  			if br.bitsRead >= 56 {
  1050  				if br.off >= 4 {
  1051  					v := br.in[br.off-4:]
  1052  					v = v[:4]
  1053  					low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  1054  					br.value |= uint64(low) << (br.bitsRead - 32)
  1055  					br.bitsRead -= 32
  1056  					br.off -= 4
  1057  				} else {
  1058  					for br.off > 0 {
  1059  						br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  1060  						br.bitsRead -= 8
  1061  						br.off--
  1062  					}
  1063  				}
  1064  			}
  1065  			// end inline...
  1066  			if offset >= endsAt {
  1067  				d.bufs.Put(buf)
  1068  				return nil, errors.New("corruption detected: stream overrun 4")
  1069  			}
  1070  
  1071  			// Read value and increment offset.
  1072  			v := single[br.peekByteFast()].entry
  1073  			nBits := uint8(v)
  1074  			br.advance(nBits)
  1075  			bitsLeft -= uint(nBits)
  1076  			out[offset] = uint8(v >> 8)
  1077  			offset++
  1078  		}
  1079  		if offset != endsAt {
  1080  			d.bufs.Put(buf)
  1081  			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
  1082  		}
  1083  
  1084  		decoded += offset - dstEvery*i
  1085  		err = br.close()
  1086  		if err != nil {
  1087  			d.bufs.Put(buf)
  1088  			return nil, err
  1089  		}
  1090  	}
  1091  	d.bufs.Put(buf)
  1092  	if dstSize != decoded {
  1093  		return nil, errors.New("corruption detected: short output block")
  1094  	}
  1095  	return dst, nil
  1096  }
  1097  
  1098  // matches will compare a decoding table to a coding table.
  1099  // Errors are written to the writer.
  1100  // Nothing will be written if table is ok.
  1101  func (s *Scratch) matches(ct cTable, w io.Writer) {
  1102  	if s == nil || len(s.dt.single) == 0 {
  1103  		return
  1104  	}
  1105  	dt := s.dt.single[:1<<s.actualTableLog]
  1106  	tablelog := s.actualTableLog
  1107  	ok := 0
  1108  	broken := 0
  1109  	for sym, enc := range ct {
  1110  		errs := 0
  1111  		broken++
  1112  		if enc.nBits == 0 {
  1113  			for _, dec := range dt {
  1114  				if uint8(dec.entry>>8) == byte(sym) {
  1115  					fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
  1116  					errs++
  1117  					break
  1118  				}
  1119  			}
  1120  			if errs == 0 {
  1121  				broken--
  1122  			}
  1123  			continue
  1124  		}
  1125  		// Unused bits in input
  1126  		ub := tablelog - enc.nBits
  1127  		top := enc.val << ub
  1128  		// decoder looks at top bits.
  1129  		dec := dt[top]
  1130  		if uint8(dec.entry) != enc.nBits {
  1131  			fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
  1132  			errs++
  1133  		}
  1134  		if uint8(dec.entry>>8) != uint8(sym) {
  1135  			fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
  1136  			errs++
  1137  		}
  1138  		if errs > 0 {
  1139  			fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
  1140  			continue
  1141  		}
  1142  		// Ensure that all combinations are covered.
  1143  		for i := uint16(0); i < (1 << ub); i++ {
  1144  			vval := top | i
  1145  			dec := dt[vval]
  1146  			if uint8(dec.entry) != enc.nBits {
  1147  				fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
  1148  				errs++
  1149  			}
  1150  			if uint8(dec.entry>>8) != uint8(sym) {
  1151  				fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
  1152  				errs++
  1153  			}
  1154  			if errs > 20 {
  1155  				fmt.Fprintf(w, "%d errros, stopping\n", errs)
  1156  				break
  1157  			}
  1158  		}
  1159  		if errs == 0 {
  1160  			ok++
  1161  			broken--
  1162  		}
  1163  	}
  1164  	if broken > 0 {
  1165  		fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
  1166  	}
  1167  }