github.com/nikandfor/tlog@v0.21.5-0.20231108111739-3ef89426a96d/tlz/encoder.go (about) 1 package tlz 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "unsafe" 8 ) 9 10 type ( 11 Encoder struct { 12 io.Writer 13 14 b []byte 15 written int64 16 17 block []byte 18 mask int 19 pos int64 20 21 ht []uint32 22 hsh uint 23 } 24 ) 25 26 // Byte multipliers. 27 const ( 28 B = 1 << (iota * 10) 29 KiB 30 MiB 31 GiB 32 ) 33 34 // Tags. 35 const ( 36 Literal = iota << 7 37 Copy 38 39 TagMask = 0b1000_0000 40 TagLenMask = 0b0111_1111 41 ) 42 43 // Tag lengths. 44 const ( 45 _ = 1<<7 - iota 46 Len8 47 Len4 48 Len2 49 Len1 50 51 Meta = 0 // Literal | Meta - means meta tag 52 ) 53 54 // Offset lengths. 55 const ( 56 _ = 1<<8 - iota 57 Off8 58 Off4 59 Off2 60 Off1 61 ) 62 63 // Meta tags. 64 const ( 65 // len: 1 2 4 8 16 32 64 Len1 66 67 MetaMagic = iota << 3 // 4: "tlz" Version 68 MetaReset // 1: block_size_log 69 70 MetaTagMask = 0b1111_1000 71 ) 72 73 const FileMagic = "\x00\x02eazy" 74 75 var zeros = make([]byte, 1024) 76 77 func NewEncoder(w io.Writer, bs int) *Encoder { 78 if bs&(bs-1) != 0 || bs < 256 { 79 panic("block size must be power of two and at least 1KB") 80 } 81 82 return NewEncoderHTSize(w, bs, bs>>6) 83 } 84 85 func newEncoder(w io.Writer, bs, ss int) *Encoder { 86 return NewEncoderHTSize(w, bs, bs>>ss) 87 } 88 89 func NewEncoderHTSize(w io.Writer, bs, hlen int) *Encoder { 90 if (bs-1)&bs != 0 { 91 panic("block size must be power of two and at least 1KB") 92 } 93 94 if (hlen-1)&hlen != 0 { 95 panic("hash table size must be power of two") 96 } 97 98 hsh := uint(2) 99 for 1<<(32-hsh) != hlen { 100 hsh++ 101 } 102 103 return &Encoder{ 104 Writer: w, 105 block: make([]byte, bs), 106 mask: bs - 1, 107 ht: make([]uint32, hlen), 108 hsh: hsh, 109 } 110 } 111 112 func (w *Encoder) Reset(wr io.Writer) { 113 w.Writer = wr 114 115 w.reset() 116 } 117 118 func (w *Encoder) reset() { 119 w.pos = 0 120 for i := 0; i < len(w.block); { 121 i += copy(w.block[i:], zeros) 122 } 123 for i := range w.ht { 124 w.ht[i] = 0 125 } 126 } 127 128 // Write is io.Writer implementation. 129 func (w *Encoder) Write(p []byte) (done int, err error) { //nolint:gocognit 130 w.b = w.b[:0] 131 132 if w.pos == 0 { 133 w.b = w.appendHeader(w.b) 134 } 135 136 start := int(w.pos) 137 138 for i := 0; i+4 < len(p); { 139 h := *(*uint32)(unsafe.Pointer(&p[i])) * 0x1e35a7bd >> w.hsh 140 141 pos := int(w.ht[h]) 142 w.ht[h] = uint32(start + i) 143 144 if off := int(w.pos) - pos; off <= i-done+4 || off >= len(w.block) { 145 i++ 146 continue 147 } 148 149 // extend backward 150 151 ist := i - 1 152 st := pos - 1 153 154 for ist >= done && p[ist] == w.block[st&w.mask] { 155 ist-- 156 st-- 157 } 158 159 ist++ 160 st++ 161 162 // extend forward 163 164 iend := i 165 end := pos 166 167 for iend < len(p) && p[iend] == w.block[end&w.mask] { 168 iend++ 169 end++ 170 } 171 172 if end-st <= 4 { 173 i++ 174 continue 175 } 176 177 off := start + i - pos 178 lit := ist - done 179 cst := st + off 180 cend := end + off 181 182 if x := cend - len(w.block) - st; x > 0 { 183 // dpr("block long intersection: reduce end by %4x\n", x) 184 end -= x 185 iend -= x 186 } 187 188 if x := end - cst + lit; x > 0 { 189 // dpr("literal intersection: reduce end by %4x\n", x) 190 end -= x 191 iend -= x 192 193 /* 194 j := done 195 for iend < len(p) && j < ist && p[iend] == p[j] && end < cst && cend < st+len(w.block) { 196 iend++ 197 cend++ 198 end++ 199 j++ 200 } 201 202 dpr("literal intersection: added back %4x\n", j-done) 203 */ 204 } 205 206 if end-st <= 4 { 207 i++ 208 continue 209 } 210 211 cend = end + off 212 213 /* 214 dpr(""+ 215 "lit %4x %4x (%4x) pos %6x %6x blk %4x %4x %q\n"+ 216 "cpy %4x %4x (%4x) pos %6x %6x blk %4x %4x %q\n"+ 217 "i %4x pos %6x bck %6x %6x blk %4x %4x off %4x st %4x end %4x\n", 218 done, ist, lit, cst-lit, cst, (cst-lit)&w.mask, cst&w.mask, p[done:ist], 219 ist, iend, iend-ist, cst, cend, cst&w.mask, cend&w.mask, p[ist:iend], 220 i, pos, st, end, st&w.mask, end&w.mask, off, st-pos, end-pos, 221 ) 222 */ 223 224 if !(st&w.mask >= cend&w.mask || cst&w.mask >= end&w.mask) { 225 panic(pos) 226 } 227 228 if done < ist { 229 w.appendLiteral(p, done, ist) 230 } 231 232 w.appendCopy(st, end) 233 234 h = *(*uint32)(unsafe.Pointer(&p[i+1])) * 0x1e35a7bd >> w.hsh 235 w.ht[h] = uint32(start + i + 1) 236 237 i = iend 238 done = iend 239 } 240 241 if done < len(p) { 242 w.appendLiteral(p, done, len(p)) 243 244 done = len(p) 245 } 246 247 n, err := w.Writer.Write(w.b) 248 w.written += int64(n) 249 250 if err != nil || n != len(w.b) { 251 w.reset() 252 } 253 254 return done, err 255 } 256 257 func (w *Encoder) appendHeader(b []byte) []byte { 258 b = append(b, Literal|Meta, MetaMagic|2, 'e', 'a', 'z', 'y') 259 260 bs := 0 261 for q := len(w.block); q != 1; q >>= 1 { 262 bs++ 263 } 264 265 b = append(b, Literal|Meta, MetaReset|0, byte(bs)) 266 267 return b 268 } 269 270 func (w *Encoder) appendLiteral(d []byte, s, e int) { 271 w.b = w.appendTag(w.b, Literal, e-s) 272 w.b = append(w.b, d[s:e]...) 273 274 for s < e { 275 n := copy(w.block[int(w.pos)&w.mask:], d[s:e]) 276 s += n 277 w.pos += int64(n) 278 } 279 } 280 281 func (w *Encoder) appendCopy(st, end int) { 282 w.b = w.appendTag(w.b, Copy, end-st) 283 w.b = w.appendOff(w.b, int(w.pos)-end) 284 285 var n int 286 for st < end { 287 limit := len(w.block) 288 if st&w.mask < end&w.mask { 289 limit = end & w.mask 290 } 291 292 n = copy(w.block[int(w.pos)&w.mask:], w.block[st&w.mask:limit]) 293 st += n 294 w.pos += int64(n) 295 } 296 } 297 298 func (w *Encoder) appendTag(b []byte, tag byte, l int) []byte { 299 switch { 300 case l < Len1: 301 return append(b, tag|byte(l)) 302 case l <= 0xff: 303 return append(b, tag|Len1, byte(l)) 304 case l <= 0xffff: 305 return append(b, tag|Len2, byte(l>>8), byte(l)) 306 case l <= 0xffff_ffff: 307 return append(b, tag|Len4, byte(l>>24), byte(l>>16), byte(l>>8), byte(l)) 308 default: 309 return append(b, tag|Len8, byte(l>>56), byte(l>>48), byte(l>>40), byte(l>>32), byte(l>>24), byte(l>>16), byte(l>>8), byte(l)) 310 } 311 } 312 313 func (w *Encoder) appendOff(b []byte, l int) []byte { 314 switch { 315 case l < Off1: 316 return append(b, byte(l)) 317 case l <= 0xff: 318 return append(b, Off1, byte(l)) 319 case l <= 0xffff: 320 return append(b, Off2, byte(l>>8), byte(l)) 321 case l <= 0xffff_ffff: 322 return append(b, Off4, byte(l>>24), byte(l>>16), byte(l>>8), byte(l)) 323 default: 324 return append(b, Off8, byte(l>>56), byte(l>>48), byte(l>>40), byte(l>>32), byte(l>>24), byte(l>>16), byte(l>>8), byte(l)) 325 } 326 } 327 328 func dpr(format string, args ...interface{}) { 329 _, _ = fmt.Fprintf(os.Stderr, format, args...) 330 }