github.com/arduino/arduino-cloud-cli@v0.0.0-20240517070944-e7a449561083/internal/lzss/lzss.go (about) 1 // This code is a go port of LZSS encoder-decoder (Haruhiko Okumura; public domain) 2 // 3 // This file is part of arduino-cloud-cli. 4 // 5 // Copyright (C) 2021 ARDUINO SA (http://www.arduino.cc/) 6 // 7 // This program is free software: you can redistribute it and/or modify 8 // it under the terms of the GNU Affero General Public License as published 9 // by the Free Software Foundation, either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // This program is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU Affero General Public License for more details. 16 // 17 // You should have received a copy of the GNU Affero General Public License 18 // along with this program. If not, see <https://www.gnu.org/licenses/>. 19 20 package lzss 21 22 import ( 23 "bytes" 24 ) 25 26 const ( 27 idxsz = 11 // Size of buffer indexes in bits, typically 10..13 bits. 28 lensz = 4 // Size of lookahead indexes in bits, typically 4..5 bits. 29 30 charsz = 8 // Size of encoded chars in bits. 31 bytemask = 128 // Mask with a bit in 8th position. Used to iterate through bits of a char. 32 33 threshold = 1 // If match length > threshold then output a token (idx, len), otherwise output one char. 34 35 bufsz = 1 << idxsz // Buffer size. 36 looksz = (1 << lensz) + 1 // Lookahead buffer size. 37 historysz = bufsz - looksz // History buffer size. 38 39 charStartBit = true // Indicates next bits encode a char. 40 tokenStartBit = false // Indicates next bits encode a token. 41 ) 42 43 // Encode takes a slice of bytes, compresses it using the lzss compression algorithm 44 // and returns the result in a new bytes buffer. 45 func Encode(data []byte) []byte { 46 // buffer is made up of two parts: the first is for already processed data (history); the second is for new data 47 buffer := make([]byte, bufsz*2) 48 // Initialize the old-data part (history) of the buffer 49 for i := 0; i < historysz; i++ { 50 buffer[i] = ' ' 51 } 52 out := newResult() 53 in := newFiller(data) 54 55 // Fill the new-data part of the buffer 56 n := in.fill(buffer[historysz:]) 57 bufferend := historysz + n 58 for current := historysz; current < bufferend; { 59 idx, len := findLargestMatch(buffer, current, bufferend) 60 if len <= threshold { 61 out.addChar(buffer[current]) 62 len = 1 63 } else { 64 out.addToken(idx, len) 65 } 66 67 current += len 68 if current >= bufsz*2-looksz { 69 // Shift processed bytes to the old-data portion of the buffer 70 copy(buffer[:bufsz], buffer[bufsz:]) 71 current -= bufsz 72 // Refill the new-data portion of the buffer 73 bufferend -= bufsz 74 bufferend += in.fill(buffer[bufferend:]) 75 } 76 } 77 78 out.flush() 79 return out.bytes() 80 } 81 82 func min(x, y int) int { 83 if x < y { 84 return x 85 } 86 return y 87 } 88 89 // findLargestMatch looks for the largest sequence of characters (from current to current+ahead) 90 // contained in the history of the buffer. 91 // It returns the index of the found match, if any, and its length. 92 // The index is relative to the current position. If idx 0 is returned than no match has been found. 93 func findLargestMatch(buf []byte, current, size int) (idx, len int) { 94 idx = 0 95 len = 1 96 ahead := min(looksz, size-current) 97 history := current - historysz 98 c := buf[current] 99 for i := current - 1; i >= history; i-- { 100 if buf[i] == c { 101 var j int 102 for j = 1; j < ahead; j++ { 103 if buf[i+j] != buf[current+j] { 104 break 105 } 106 } 107 if j > len { 108 idx = i 109 len = j 110 } 111 } 112 } 113 return 114 } 115 116 // filler abstracts the process of consuming an input buffer 117 // using its bytes to fill another buffer. 118 // It's been used to facilitate the handling of the input buffer in the Encode function. 119 type filler struct { 120 src []byte 121 idx int 122 } 123 124 func newFiller(src []byte) *filler { 125 return &filler{ 126 src: src, 127 } 128 } 129 130 // fill tries to fill all the dst buffer with bytes read from src. 131 // It returns the number of bytes moved from src to dst. 132 // The src buffer offset is then incremented so that all the content of src 133 // can be consumed in small chunks. 134 func (f *filler) fill(dst []byte) int { 135 n := copy(dst, f.src[f.idx:]) 136 f.idx += n 137 return n 138 } 139 140 // result is responsible for storing the actual result of the encoding. 141 // It knows how to store characters and tokens in the resulting buffer. 142 // It must be flushed at the end of the encoding in order to store the 143 // remaining bits of bitBuffer. 144 type result struct { 145 bitBuffer int 146 bitMask int 147 out *bytes.Buffer 148 } 149 150 func newResult() *result { 151 return &result{ 152 bitBuffer: 0, 153 bitMask: bytemask, 154 out: &bytes.Buffer{}, 155 } 156 } 157 158 // addChar stores a char in the out buffer. 159 func (r *result) addChar(c byte) { 160 i := int(c) 161 r.putbit(charStartBit) 162 for mask := (1 << charsz) >> 1; mask != 0; mask = mask >> 1 { 163 b := (i & mask) != 0 164 r.putbit(b) 165 } 166 } 167 168 // addToken stores a token in the out buffer. 169 func (r *result) addToken(idx, len int) { 170 // Adjust idx and len to fit idxsz and lensz bits respectively 171 idx &= bufsz - 1 172 len -= 2 173 174 r.putbit(tokenStartBit) 175 for mask := (1 << idxsz) >> 1; mask != 0; mask = mask >> 1 { 176 b := idx&mask != 0 177 r.putbit(b) 178 } 179 180 for mask := (1 << lensz) >> 1; mask != 0; mask = mask >> 1 { 181 b := len&mask != 0 182 r.putbit(b) 183 } 184 } 185 186 func (r *result) flush() { 187 if r.bitMask != bytemask { 188 r.out.WriteByte(byte(r.bitBuffer)) 189 } 190 } 191 192 // putbit puts the passed bit (true -> 1; false -> 0) in the bitBuffer. 193 // When bitBuffer contains an entire byte it's written to the out buffer. 194 func (r *result) putbit(b bool) { 195 if b { 196 r.bitBuffer |= r.bitMask 197 } 198 r.bitMask = r.bitMask >> 1 199 if r.bitMask == 0 { 200 r.out.WriteByte(byte(r.bitBuffer)) 201 r.bitBuffer = 0 202 r.bitMask = bytemask 203 } 204 } 205 206 func (r *result) bytes() []byte { 207 return r.out.Bytes() 208 }