github.com/nikandfor/tlog@v0.21.5-0.20231108111739-3ef89426a96d/tlz/encoder.go (about)

     1  package tlz
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"unsafe"
     8  )
     9  
    10  type (
    11  	Encoder struct {
    12  		io.Writer
    13  
    14  		b       []byte
    15  		written int64
    16  
    17  		block []byte
    18  		mask  int
    19  		pos   int64
    20  
    21  		ht  []uint32
    22  		hsh uint
    23  	}
    24  )
    25  
    26  // Byte multipliers.
    27  const (
    28  	B = 1 << (iota * 10)
    29  	KiB
    30  	MiB
    31  	GiB
    32  )
    33  
    34  // Tags.
    35  const (
    36  	Literal = iota << 7
    37  	Copy
    38  
    39  	TagMask    = 0b1000_0000
    40  	TagLenMask = 0b0111_1111
    41  )
    42  
    43  // Tag lengths.
    44  const (
    45  	_ = 1<<7 - iota
    46  	Len8
    47  	Len4
    48  	Len2
    49  	Len1
    50  
    51  	Meta = 0 // Literal | Meta - means meta tag
    52  )
    53  
    54  // Offset lengths.
    55  const (
    56  	_ = 1<<8 - iota
    57  	Off8
    58  	Off4
    59  	Off2
    60  	Off1
    61  )
    62  
    63  // Meta tags.
    64  const (
    65  	// len: 1 2 4 8  16 32 64 Len1
    66  
    67  	MetaMagic = iota << 3 // 4: "tlz" Version
    68  	MetaReset             // 1: block_size_log
    69  
    70  	MetaTagMask = 0b1111_1000
    71  )
    72  
    73  const FileMagic = "\x00\x02eazy"
    74  
    75  var zeros = make([]byte, 1024)
    76  
    77  func NewEncoder(w io.Writer, bs int) *Encoder {
    78  	if bs&(bs-1) != 0 || bs < 256 {
    79  		panic("block size must be power of two and at least 1KB")
    80  	}
    81  
    82  	return NewEncoderHTSize(w, bs, bs>>6)
    83  }
    84  
    85  func newEncoder(w io.Writer, bs, ss int) *Encoder {
    86  	return NewEncoderHTSize(w, bs, bs>>ss)
    87  }
    88  
    89  func NewEncoderHTSize(w io.Writer, bs, hlen int) *Encoder {
    90  	if (bs-1)&bs != 0 {
    91  		panic("block size must be power of two and at least 1KB")
    92  	}
    93  
    94  	if (hlen-1)&hlen != 0 {
    95  		panic("hash table size must be power of two")
    96  	}
    97  
    98  	hsh := uint(2)
    99  	for 1<<(32-hsh) != hlen {
   100  		hsh++
   101  	}
   102  
   103  	return &Encoder{
   104  		Writer: w,
   105  		block:  make([]byte, bs),
   106  		mask:   bs - 1,
   107  		ht:     make([]uint32, hlen),
   108  		hsh:    hsh,
   109  	}
   110  }
   111  
   112  func (w *Encoder) Reset(wr io.Writer) {
   113  	w.Writer = wr
   114  
   115  	w.reset()
   116  }
   117  
   118  func (w *Encoder) reset() {
   119  	w.pos = 0
   120  	for i := 0; i < len(w.block); {
   121  		i += copy(w.block[i:], zeros)
   122  	}
   123  	for i := range w.ht {
   124  		w.ht[i] = 0
   125  	}
   126  }
   127  
   128  // Write is io.Writer implementation.
   129  func (w *Encoder) Write(p []byte) (done int, err error) { //nolint:gocognit
   130  	w.b = w.b[:0]
   131  
   132  	if w.pos == 0 {
   133  		w.b = w.appendHeader(w.b)
   134  	}
   135  
   136  	start := int(w.pos)
   137  
   138  	for i := 0; i+4 < len(p); {
   139  		h := *(*uint32)(unsafe.Pointer(&p[i])) * 0x1e35a7bd >> w.hsh
   140  
   141  		pos := int(w.ht[h])
   142  		w.ht[h] = uint32(start + i)
   143  
   144  		if off := int(w.pos) - pos; off <= i-done+4 || off >= len(w.block) {
   145  			i++
   146  			continue
   147  		}
   148  
   149  		// extend backward
   150  
   151  		ist := i - 1
   152  		st := pos - 1
   153  
   154  		for ist >= done && p[ist] == w.block[st&w.mask] {
   155  			ist--
   156  			st--
   157  		}
   158  
   159  		ist++
   160  		st++
   161  
   162  		// extend forward
   163  
   164  		iend := i
   165  		end := pos
   166  
   167  		for iend < len(p) && p[iend] == w.block[end&w.mask] {
   168  			iend++
   169  			end++
   170  		}
   171  
   172  		if end-st <= 4 {
   173  			i++
   174  			continue
   175  		}
   176  
   177  		off := start + i - pos
   178  		lit := ist - done
   179  		cst := st + off
   180  		cend := end + off
   181  
   182  		if x := cend - len(w.block) - st; x > 0 {
   183  			//	dpr("block long  intersection: reduce end by %4x\n", x)
   184  			end -= x
   185  			iend -= x
   186  		}
   187  
   188  		if x := end - cst + lit; x > 0 {
   189  			//	dpr("literal     intersection: reduce end by %4x\n", x)
   190  			end -= x
   191  			iend -= x
   192  
   193  			/*
   194  				j := done
   195  				for iend < len(p) && j < ist && p[iend] == p[j] && end < cst && cend < st+len(w.block) {
   196  					iend++
   197  					cend++
   198  					end++
   199  					j++
   200  				}
   201  
   202  				dpr("literal     intersection: added back %4x\n", j-done)
   203  			*/
   204  		}
   205  
   206  		if end-st <= 4 {
   207  			i++
   208  			continue
   209  		}
   210  
   211  		cend = end + off
   212  
   213  		/*
   214  			dpr(""+
   215  				"lit %4x %4x (%4x)  pos %6x %6x  blk %4x %4x  %q\n"+
   216  				"cpy %4x %4x (%4x)  pos %6x %6x  blk %4x %4x  %q\n"+
   217  				"i   %4x pos %6x   bck %6x %6x  blk %4x %4x  off %4x  st %4x end %4x\n",
   218  				done, ist, lit, cst-lit, cst, (cst-lit)&w.mask, cst&w.mask, p[done:ist],
   219  				ist, iend, iend-ist, cst, cend, cst&w.mask, cend&w.mask, p[ist:iend],
   220  				i, pos, st, end, st&w.mask, end&w.mask, off, st-pos, end-pos,
   221  			)
   222  		*/
   223  
   224  		if !(st&w.mask >= cend&w.mask || cst&w.mask >= end&w.mask) {
   225  			panic(pos)
   226  		}
   227  
   228  		if done < ist {
   229  			w.appendLiteral(p, done, ist)
   230  		}
   231  
   232  		w.appendCopy(st, end)
   233  
   234  		h = *(*uint32)(unsafe.Pointer(&p[i+1])) * 0x1e35a7bd >> w.hsh
   235  		w.ht[h] = uint32(start + i + 1)
   236  
   237  		i = iend
   238  		done = iend
   239  	}
   240  
   241  	if done < len(p) {
   242  		w.appendLiteral(p, done, len(p))
   243  
   244  		done = len(p)
   245  	}
   246  
   247  	n, err := w.Writer.Write(w.b)
   248  	w.written += int64(n)
   249  
   250  	if err != nil || n != len(w.b) {
   251  		w.reset()
   252  	}
   253  
   254  	return done, err
   255  }
   256  
   257  func (w *Encoder) appendHeader(b []byte) []byte {
   258  	b = append(b, Literal|Meta, MetaMagic|2, 'e', 'a', 'z', 'y')
   259  
   260  	bs := 0
   261  	for q := len(w.block); q != 1; q >>= 1 {
   262  		bs++
   263  	}
   264  
   265  	b = append(b, Literal|Meta, MetaReset|0, byte(bs))
   266  
   267  	return b
   268  }
   269  
   270  func (w *Encoder) appendLiteral(d []byte, s, e int) {
   271  	w.b = w.appendTag(w.b, Literal, e-s)
   272  	w.b = append(w.b, d[s:e]...)
   273  
   274  	for s < e {
   275  		n := copy(w.block[int(w.pos)&w.mask:], d[s:e])
   276  		s += n
   277  		w.pos += int64(n)
   278  	}
   279  }
   280  
   281  func (w *Encoder) appendCopy(st, end int) {
   282  	w.b = w.appendTag(w.b, Copy, end-st)
   283  	w.b = w.appendOff(w.b, int(w.pos)-end)
   284  
   285  	var n int
   286  	for st < end {
   287  		limit := len(w.block)
   288  		if st&w.mask < end&w.mask {
   289  			limit = end & w.mask
   290  		}
   291  
   292  		n = copy(w.block[int(w.pos)&w.mask:], w.block[st&w.mask:limit])
   293  		st += n
   294  		w.pos += int64(n)
   295  	}
   296  }
   297  
   298  func (w *Encoder) appendTag(b []byte, tag byte, l int) []byte {
   299  	switch {
   300  	case l < Len1:
   301  		return append(b, tag|byte(l))
   302  	case l <= 0xff:
   303  		return append(b, tag|Len1, byte(l))
   304  	case l <= 0xffff:
   305  		return append(b, tag|Len2, byte(l>>8), byte(l))
   306  	case l <= 0xffff_ffff:
   307  		return append(b, tag|Len4, byte(l>>24), byte(l>>16), byte(l>>8), byte(l))
   308  	default:
   309  		return append(b, tag|Len8, byte(l>>56), byte(l>>48), byte(l>>40), byte(l>>32), byte(l>>24), byte(l>>16), byte(l>>8), byte(l))
   310  	}
   311  }
   312  
   313  func (w *Encoder) appendOff(b []byte, l int) []byte {
   314  	switch {
   315  	case l < Off1:
   316  		return append(b, byte(l))
   317  	case l <= 0xff:
   318  		return append(b, Off1, byte(l))
   319  	case l <= 0xffff:
   320  		return append(b, Off2, byte(l>>8), byte(l))
   321  	case l <= 0xffff_ffff:
   322  		return append(b, Off4, byte(l>>24), byte(l>>16), byte(l>>8), byte(l))
   323  	default:
   324  		return append(b, Off8, byte(l>>56), byte(l>>48), byte(l>>40), byte(l>>32), byte(l>>24), byte(l>>16), byte(l>>8), byte(l))
   325  	}
   326  }
   327  
   328  func dpr(format string, args ...interface{}) {
   329  	_, _ = fmt.Fprintf(os.Stderr, format, args...)
   330  }