github.com/bir3/gocompiler@v0.9.2202/extra/compress/fse/fse.go (about)

     1  // Copyright 2018 Klaus Post. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
     5  
     6  // Package fse provides Finite State Entropy encoding and decoding.
     7  //
     8  // Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
     9  // for byte blocks as implemented in zstd.
    10  //
    11  // See https://github.com/klauspost/compress/tree/master/fse for more information.
    12  package fse
    13  
    14  import (
    15  	"errors"
    16  	"fmt"
    17  	"math/bits"
    18  )
    19  
    20  const (
    21  	/*!MEMORY_USAGE :
    22  	 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
    23  	 *  Increasing memory usage improves compression ratio
    24  	 *  Reduced memory usage can improve speed, due to cache effect
    25  	 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
    26  	maxMemoryUsage     = 14
    27  	defaultMemoryUsage = 13
    28  
    29  	maxTableLog     = maxMemoryUsage - 2
    30  	maxTablesize    = 1 << maxTableLog
    31  	defaultTablelog = defaultMemoryUsage - 2
    32  	minTablelog     = 5
    33  	maxSymbolValue  = 255
    34  )
    35  
    36  var (
    37  	// ErrIncompressible is returned when input is judged to be too hard to compress.
    38  	ErrIncompressible = errors.New("input is not compressible")
    39  
    40  	// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
    41  	ErrUseRLE = errors.New("input is single value repeated")
    42  )
    43  
    44  // Scratch provides temporary storage for compression and decompression.
    45  type Scratch struct {
    46  	// Private
    47  	count    [maxSymbolValue + 1]uint32
    48  	norm     [maxSymbolValue + 1]int16
    49  	br       byteReader
    50  	bits     bitReader
    51  	bw       bitWriter
    52  	ct       cTable      // Compression tables.
    53  	decTable []decSymbol // Decompression table.
    54  	maxCount int         // count of the most probable symbol
    55  
    56  	// Per block parameters.
    57  	// These can be used to override compression parameters of the block.
    58  	// Do not touch, unless you know what you are doing.
    59  
    60  	// Out is output buffer.
    61  	// If the scratch is re-used before the caller is done processing the output,
    62  	// set this field to nil.
    63  	// Otherwise the output buffer will be re-used for next Compression/Decompression step
    64  	// and allocation will be avoided.
    65  	Out []byte
    66  
    67  	// DecompressLimit limits the maximum decoded size acceptable.
    68  	// If > 0 decompression will stop when approximately this many bytes
    69  	// has been decoded.
    70  	// If 0, maximum size will be 2GB.
    71  	DecompressLimit int
    72  
    73  	symbolLen      uint16 // Length of active part of the symbol table.
    74  	actualTableLog uint8  // Selected tablelog.
    75  	zeroBits       bool   // no bits has prob > 50%.
    76  	clearCount     bool   // clear count
    77  
    78  	// MaxSymbolValue will override the maximum symbol value of the next block.
    79  	MaxSymbolValue uint8
    80  
    81  	// TableLog will attempt to override the tablelog for the next block.
    82  	TableLog uint8
    83  }
    84  
    85  // Histogram allows to populate the histogram and skip that step in the compression,
    86  // It otherwise allows to inspect the histogram when compression is done.
    87  // To indicate that you have populated the histogram call HistogramFinished
    88  // with the value of the highest populated symbol, as well as the number of entries
    89  // in the most populated entry. These are accepted at face value.
    90  // The returned slice will always be length 256.
    91  func (s *Scratch) Histogram() []uint32 {
    92  	return s.count[:]
    93  }
    94  
    95  // HistogramFinished can be called to indicate that the histogram has been populated.
    96  // maxSymbol is the index of the highest set symbol of the next data segment.
    97  // maxCount is the number of entries in the most populated entry.
    98  // These are accepted at face value.
    99  func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
   100  	s.maxCount = maxCount
   101  	s.symbolLen = uint16(maxSymbol) + 1
   102  	s.clearCount = maxCount != 0
   103  }
   104  
   105  // prepare will prepare and allocate scratch tables used for both compression and decompression.
   106  func (s *Scratch) prepare(in []byte) (*Scratch, error) {
   107  	if s == nil {
   108  		s = &Scratch{}
   109  	}
   110  	if s.MaxSymbolValue == 0 {
   111  		s.MaxSymbolValue = 255
   112  	}
   113  	if s.TableLog == 0 {
   114  		s.TableLog = defaultTablelog
   115  	}
   116  	if s.TableLog > maxTableLog {
   117  		return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
   118  	}
   119  	if cap(s.Out) == 0 {
   120  		s.Out = make([]byte, 0, len(in))
   121  	}
   122  	if s.clearCount && s.maxCount == 0 {
   123  		for i := range s.count {
   124  			s.count[i] = 0
   125  		}
   126  		s.clearCount = false
   127  	}
   128  	s.br.init(in)
   129  	if s.DecompressLimit == 0 {
   130  		// Max size 2GB.
   131  		s.DecompressLimit = (2 << 30) - 1
   132  	}
   133  
   134  	return s, nil
   135  }
   136  
   137  // tableStep returns the next table index.
   138  func tableStep(tableSize uint32) uint32 {
   139  	return (tableSize >> 1) + (tableSize >> 3) + 3
   140  }
   141  
   142  func highBits(val uint32) (n uint32) {
   143  	return uint32(bits.Len32(val) - 1)
   144  }