github.com/arduino/arduino-cloud-cli@v0.0.0-20240517070944-e7a449561083/internal/lzss/lzss.go (about)

     1  // This code is a go port of LZSS encoder-decoder (Haruhiko Okumura; public domain)
     2  //
     3  // This file is part of arduino-cloud-cli.
     4  //
     5  // Copyright (C) 2021 ARDUINO SA (http://www.arduino.cc/)
     6  //
     7  // This program is free software: you can redistribute it and/or modify
     8  // it under the terms of the GNU Affero General Public License as published
     9  // by the Free Software Foundation, either version 3 of the License, or
    10  // (at your option) any later version.
    11  //
    12  // This program is distributed in the hope that it will be useful,
    13  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    14  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    15  // GNU Affero General Public License for more details.
    16  //
    17  // You should have received a copy of the GNU Affero General Public License
    18  // along with this program.  If not, see <https://www.gnu.org/licenses/>.
    19  
    20  package lzss
    21  
    22  import (
    23  	"bytes"
    24  )
    25  
    26  const (
    27  	idxsz = 11 // Size of buffer indexes in bits, typically 10..13 bits.
    28  	lensz = 4  // Size of lookahead indexes in bits, typically 4..5 bits.
    29  
    30  	charsz   = 8   // Size of encoded chars in bits.
    31  	bytemask = 128 // Mask with a bit in 8th position. Used to iterate through bits of a char.
    32  
    33  	threshold = 1 // If match length > threshold then output a token (idx, len), otherwise output one char.
    34  
    35  	bufsz     = 1 << idxsz       // Buffer size.
    36  	looksz    = (1 << lensz) + 1 // Lookahead buffer size.
    37  	historysz = bufsz - looksz   // History buffer size.
    38  
    39  	charStartBit  = true  // Indicates next bits encode a char.
    40  	tokenStartBit = false // Indicates next bits encode a token.
    41  )
    42  
    43  // Encode takes a slice of bytes, compresses it using the lzss compression algorithm
    44  // and returns the result in a new bytes buffer.
    45  func Encode(data []byte) []byte {
    46  	// buffer is made up of two parts: the first is for already processed data (history); the second is for new data
    47  	buffer := make([]byte, bufsz*2)
    48  	// Initialize the old-data part (history) of the buffer
    49  	for i := 0; i < historysz; i++ {
    50  		buffer[i] = ' '
    51  	}
    52  	out := newResult()
    53  	in := newFiller(data)
    54  
    55  	// Fill the new-data part of the buffer
    56  	n := in.fill(buffer[historysz:])
    57  	bufferend := historysz + n
    58  	for current := historysz; current < bufferend; {
    59  		idx, len := findLargestMatch(buffer, current, bufferend)
    60  		if len <= threshold {
    61  			out.addChar(buffer[current])
    62  			len = 1
    63  		} else {
    64  			out.addToken(idx, len)
    65  		}
    66  
    67  		current += len
    68  		if current >= bufsz*2-looksz {
    69  			// Shift processed bytes to the old-data portion of the buffer
    70  			copy(buffer[:bufsz], buffer[bufsz:])
    71  			current -= bufsz
    72  			// Refill the new-data portion of the buffer
    73  			bufferend -= bufsz
    74  			bufferend += in.fill(buffer[bufferend:])
    75  		}
    76  	}
    77  
    78  	out.flush()
    79  	return out.bytes()
    80  }
    81  
    82  func min(x, y int) int {
    83  	if x < y {
    84  		return x
    85  	}
    86  	return y
    87  }
    88  
    89  // findLargestMatch looks for the largest sequence of characters (from current to current+ahead)
    90  // contained in the history of the buffer.
    91  // It returns the index of the found match, if any, and its length.
    92  // The index is relative to the current position. If idx 0 is returned than no match has been found.
    93  func findLargestMatch(buf []byte, current, size int) (idx, len int) {
    94  	idx = 0
    95  	len = 1
    96  	ahead := min(looksz, size-current)
    97  	history := current - historysz
    98  	c := buf[current]
    99  	for i := current - 1; i >= history; i-- {
   100  		if buf[i] == c {
   101  			var j int
   102  			for j = 1; j < ahead; j++ {
   103  				if buf[i+j] != buf[current+j] {
   104  					break
   105  				}
   106  			}
   107  			if j > len {
   108  				idx = i
   109  				len = j
   110  			}
   111  		}
   112  	}
   113  	return
   114  }
   115  
   116  // filler abstracts the process of consuming an input buffer
   117  // using its bytes to fill another buffer.
   118  // It's been used to facilitate the handling of the input buffer in the Encode function.
   119  type filler struct {
   120  	src []byte
   121  	idx int
   122  }
   123  
   124  func newFiller(src []byte) *filler {
   125  	return &filler{
   126  		src: src,
   127  	}
   128  }
   129  
   130  // fill tries to fill all the dst buffer with bytes read from src.
   131  // It returns the number of bytes moved from src to dst.
   132  // The src buffer offset is then incremented so that all the content of src
   133  // can be consumed in small chunks.
   134  func (f *filler) fill(dst []byte) int {
   135  	n := copy(dst, f.src[f.idx:])
   136  	f.idx += n
   137  	return n
   138  }
   139  
   140  // result is responsible for storing the actual result of the encoding.
   141  // It knows how to store characters and tokens in the resulting buffer.
   142  // It must be flushed at the end of the encoding in order to store the
   143  // remaining bits of bitBuffer.
   144  type result struct {
   145  	bitBuffer int
   146  	bitMask   int
   147  	out       *bytes.Buffer
   148  }
   149  
   150  func newResult() *result {
   151  	return &result{
   152  		bitBuffer: 0,
   153  		bitMask:   bytemask,
   154  		out:       &bytes.Buffer{},
   155  	}
   156  }
   157  
   158  // addChar stores a char in the out buffer.
   159  func (r *result) addChar(c byte) {
   160  	i := int(c)
   161  	r.putbit(charStartBit)
   162  	for mask := (1 << charsz) >> 1; mask != 0; mask = mask >> 1 {
   163  		b := (i & mask) != 0
   164  		r.putbit(b)
   165  	}
   166  }
   167  
   168  // addToken stores a token in the out buffer.
   169  func (r *result) addToken(idx, len int) {
   170  	// Adjust idx and len to fit idxsz and lensz bits respectively
   171  	idx &= bufsz - 1
   172  	len -= 2
   173  
   174  	r.putbit(tokenStartBit)
   175  	for mask := (1 << idxsz) >> 1; mask != 0; mask = mask >> 1 {
   176  		b := idx&mask != 0
   177  		r.putbit(b)
   178  	}
   179  
   180  	for mask := (1 << lensz) >> 1; mask != 0; mask = mask >> 1 {
   181  		b := len&mask != 0
   182  		r.putbit(b)
   183  	}
   184  }
   185  
   186  func (r *result) flush() {
   187  	if r.bitMask != bytemask {
   188  		r.out.WriteByte(byte(r.bitBuffer))
   189  	}
   190  }
   191  
   192  // putbit puts the passed bit (true -> 1; false -> 0) in the bitBuffer.
   193  // When bitBuffer contains an entire byte it's written to the out buffer.
   194  func (r *result) putbit(b bool) {
   195  	if b {
   196  		r.bitBuffer |= r.bitMask
   197  	}
   198  	r.bitMask = r.bitMask >> 1
   199  	if r.bitMask == 0 {
   200  		r.out.WriteByte(byte(r.bitBuffer))
   201  		r.bitBuffer = 0
   202  		r.bitMask = bytemask
   203  	}
   204  }
   205  
   206  func (r *result) bytes() []byte {
   207  	return r.out.Bytes()
   208  }