github.com/nikandfor/tlog@v0.21.5-0.20231108111739-3ef89426a96d/tlz/decoder.go (about)

     1  package tlz
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  
     7  	"github.com/nikandfor/errors"
     8  	"github.com/nikandfor/hacked/hfmt"
     9  
    10  	"github.com/nikandfor/tlog/low"
    11  )
    12  
    13  type (
    14  	Decoder struct {
    15  		io.Reader
    16  
    17  		// output
    18  		block []byte
    19  		mask  int
    20  		pos   int64 // output stream pos
    21  
    22  		// current tag
    23  		state    byte
    24  		off, len int
    25  
    26  		// input
    27  		b    []byte
    28  		i    int
    29  		boff int64 // input stream offset to b[0]
    30  	}
    31  
    32  	Dumper struct {
    33  		io.Writer
    34  
    35  		d Decoder
    36  
    37  		GlobalOffset int64
    38  
    39  		b low.Buf
    40  	}
    41  )
    42  
    43  var eUnexpectedEOF = errors.NewNoCaller("need more")
    44  
    45  func NewDecoder(r io.Reader) *Decoder {
    46  	return &Decoder{
    47  		Reader: r,
    48  	}
    49  }
    50  
    51  func NewDecoderBytes(b []byte) *Decoder {
    52  	return &Decoder{
    53  		b: b,
    54  	}
    55  }
    56  
    57  func (d *Decoder) Reset(rd io.Reader) {
    58  	d.ResetBytes(nil)
    59  	d.Reader = rd
    60  }
    61  
    62  func (d *Decoder) ResetBytes(b []byte) {
    63  	d.Reader = nil
    64  
    65  	if b != nil {
    66  		d.b = b
    67  	}
    68  
    69  	d.i = 0
    70  	d.b = d.b[:len(b)]
    71  	d.boff = 0
    72  
    73  	d.state = 0
    74  }
    75  
    76  func (d *Decoder) Read(p []byte) (n int, err error) {
    77  	var m, i int
    78  
    79  	for n < len(p) && err == nil {
    80  		m, i, err = d.read(p[n:], d.i)
    81  
    82  		n += m
    83  		d.i = i
    84  
    85  		if n == len(p) {
    86  			err = nil
    87  			break
    88  		}
    89  
    90  		if err != eUnexpectedEOF { //nolint:errorlint
    91  			continue
    92  		}
    93  
    94  		err = d.more()
    95  		if errors.Is(err, io.EOF) && (d.state != 0 || d.i < len(d.b)) {
    96  			err = io.ErrUnexpectedEOF
    97  		}
    98  	}
    99  
   100  	return n, err
   101  }
   102  
   103  func (d *Decoder) read(p []byte, st int) (n, i int, err error) {
   104  	//	defer func() { println("eazy.Decoder.read", st, i, n, err, len(d.b)) }()
   105  	if d.state != 0 && len(d.block) == 0 {
   106  		return 0, st, errors.New("missed meta")
   107  	}
   108  
   109  	i = st
   110  
   111  	for d.state == 0 {
   112  		i, err = d.readTag(i)
   113  		if err != nil {
   114  			return
   115  		}
   116  	}
   117  
   118  	if d.state == 'l' && i == len(d.b) {
   119  		return 0, i, eUnexpectedEOF
   120  	}
   121  
   122  	end := d.len
   123  	if end > len(p) {
   124  		end = len(p)
   125  	}
   126  
   127  	if d.state == 'l' {
   128  		end = copy(p[:end], d.b[i:])
   129  		i += end
   130  	} else {
   131  		end = copy(p[:end], d.block[d.off&d.mask:])
   132  		d.off += end
   133  	}
   134  
   135  	d.len -= end
   136  
   137  	for n < end {
   138  		m := copy(d.block[int(d.pos)&d.mask:], p[n:end])
   139  		n += m
   140  		d.pos += int64(m)
   141  	}
   142  
   143  	if d.len == 0 {
   144  		d.state = 0
   145  	}
   146  
   147  	return
   148  }
   149  
   150  func (d *Decoder) readTag(st int) (i int, err error) {
   151  	tag, l, i, err := d.tag(d.b, st)
   152  	if err != nil {
   153  		return st, err
   154  	}
   155  
   156  	//	println("readTag", tag, l, st, i, d.i, len(d.b))
   157  
   158  	if tag == Literal && l == Meta {
   159  		return d.continueMetaTag(i)
   160  	}
   161  
   162  	switch tag {
   163  	case Literal:
   164  		d.state = 'l'
   165  		d.len = l
   166  	case Copy:
   167  		d.off, i, err = d.roff(d.b, i)
   168  		if err != nil {
   169  			return st, err
   170  		}
   171  
   172  		d.off = int(d.pos) - d.off - l
   173  
   174  		d.state = 'c'
   175  		d.len = l
   176  	default:
   177  		return st, errors.New("unsupported tag: %x", tag)
   178  	}
   179  
   180  	return i, nil
   181  }
   182  
   183  func (d *Decoder) continueMetaTag(st int) (i int, err error) {
   184  	i = st
   185  	st--
   186  
   187  	if i == len(d.b) {
   188  		return st, eUnexpectedEOF
   189  	}
   190  
   191  	{ // legacy fallback
   192  		const legacy = "\x00\x03tlz\x00\x13000\x00\x20"
   193  
   194  		if len(d.b) < len(legacy)+1 && bytes.Equal(d.b, []byte(legacy)[:len(d.b)]) {
   195  			return st, eUnexpectedEOF
   196  		}
   197  
   198  		if bytes.Equal(d.b[:len(legacy)], []byte(legacy)) {
   199  			i = len(legacy)
   200  
   201  			bs := int(d.b[i])
   202  			i++
   203  
   204  			bs = 1 << bs
   205  
   206  			if cap(d.block) >= bs {
   207  				d.block = d.block[:bs]
   208  
   209  				for i := 0; i < bs; {
   210  					i += copy(d.block[i:], zeros)
   211  				}
   212  			} else {
   213  				d.block = make([]byte, bs)
   214  			}
   215  
   216  			d.pos = 0
   217  			d.mask = bs - 1
   218  
   219  			d.state = 0
   220  
   221  			return i, nil
   222  		}
   223  	}
   224  
   225  	meta := d.b[i]
   226  	i++
   227  
   228  	l := int(meta &^ MetaTagMask)
   229  
   230  	if l == 7 {
   231  		if i == len(d.b) {
   232  			return st, eUnexpectedEOF
   233  		}
   234  
   235  		l = int(d.b[i])
   236  		i++
   237  	} else {
   238  		l = 1 << l
   239  	}
   240  
   241  	//	println("meta", st-1, i, meta, l, i+l, len(d.b))
   242  
   243  	if i+l > len(d.b) {
   244  		return st, eUnexpectedEOF
   245  	}
   246  
   247  	switch meta & MetaTagMask {
   248  	case MetaMagic:
   249  		if !bytes.Equal(d.b[i:i+l], []byte("eazy")) {
   250  			return st, errors.New("bad magic")
   251  		}
   252  	case MetaReset:
   253  		bs := int(d.b[i])
   254  		bs = 1 << bs
   255  
   256  		if cap(d.block) >= bs {
   257  			d.block = d.block[:bs]
   258  
   259  			for i := 0; i < bs; {
   260  				i += copy(d.block[i:], zeros)
   261  			}
   262  		} else {
   263  			d.block = make([]byte, bs)
   264  		}
   265  
   266  		d.pos = 0
   267  		d.mask = bs - 1
   268  
   269  		d.state = 0
   270  	default:
   271  		return st, errors.New("unsupported meta: %x", meta)
   272  	}
   273  
   274  	i += l
   275  
   276  	return i, nil
   277  }
   278  
   279  func (d *Decoder) roff(b []byte, st int) (off, i int, err error) {
   280  	if st >= len(b) {
   281  		return 0, st, eUnexpectedEOF
   282  	}
   283  
   284  	i = st
   285  
   286  	off = int(b[i])
   287  	i++
   288  
   289  	switch off {
   290  	case Off1:
   291  		if i+1 > len(b) {
   292  			return off, st, eUnexpectedEOF
   293  		}
   294  
   295  		off = int(b[i])
   296  		i++
   297  	case Off2:
   298  		if i+2 > len(b) {
   299  			return off, st, eUnexpectedEOF
   300  		}
   301  
   302  		off = int(b[i])<<8 | int(b[i+1])
   303  		i += 2
   304  	case Off4:
   305  		if i+4 > len(b) {
   306  			return off, st, eUnexpectedEOF
   307  		}
   308  
   309  		off = int(b[i])<<24 | int(b[i+1])<<16 | int(b[i+2])<<8 | int(b[i+3])
   310  		i += 4
   311  	case Off8:
   312  		if i+8 > len(b) {
   313  			return off, st, eUnexpectedEOF
   314  		}
   315  
   316  		off = int(b[i])<<56 | int(b[i+1])<<48 | int(b[i+2])<<40 | int(b[i+3])<<32 |
   317  			int(b[i+4])<<24 | int(b[i+5])<<16 | int(b[i+6])<<8 | int(b[i+7])
   318  		i += 8
   319  	default:
   320  		// off is embedded
   321  	}
   322  
   323  	return off, i, nil
   324  }
   325  
   326  func (d *Decoder) tag(b []byte, st int) (tag, l, i int, err error) {
   327  	if st >= len(b) {
   328  		return 0, 0, st, eUnexpectedEOF
   329  	}
   330  
   331  	i = st
   332  
   333  	tag = int(b[i]) & TagMask
   334  	l = int(b[i]) & TagLenMask
   335  	i++
   336  
   337  	switch l {
   338  	case Len1:
   339  		if i+1 > len(b) {
   340  			return tag, l, st, eUnexpectedEOF
   341  		}
   342  
   343  		l = int(b[i])
   344  		i++
   345  	case Len2:
   346  		if i+2 > len(b) {
   347  			return tag, l, st, eUnexpectedEOF
   348  		}
   349  
   350  		l = int(b[i])<<8 | int(b[i+1])
   351  		i += 2
   352  	case Len4:
   353  		if i+4 > len(b) {
   354  			return tag, l, st, eUnexpectedEOF
   355  		}
   356  
   357  		l = int(b[i])<<24 | int(b[i+1])<<16 | int(b[i+2])<<8 | int(b[i+3])
   358  		i += 4
   359  	case Len8:
   360  		if i+8 > len(b) {
   361  			return tag, l, st, eUnexpectedEOF
   362  		}
   363  
   364  		l = int(b[i])<<56 | int(b[i+1])<<48 | int(b[i+2])<<40 | int(b[i+3])<<32 |
   365  			int(b[i+4])<<24 | int(b[i+5])<<16 | int(b[i+6])<<8 | int(b[i+7])
   366  		i += 8
   367  	default:
   368  		// l is embedded
   369  	}
   370  
   371  	return tag, l, i, nil
   372  }
   373  
   374  func (d *Decoder) more() (err error) {
   375  	if d.Reader == nil {
   376  		return io.EOF
   377  	}
   378  
   379  	copy(d.b, d.b[d.i:])
   380  	d.b = d.b[:len(d.b)-d.i]
   381  	d.boff += int64(d.i)
   382  	d.i = 0
   383  
   384  	end := len(d.b)
   385  
   386  	if len(d.b) == 0 {
   387  		d.b = make([]byte, 1024)
   388  	} else {
   389  		d.b = append(d.b, 0, 0, 0, 0, 0, 0, 0, 0)
   390  	}
   391  
   392  	d.b = d.b[:cap(d.b)]
   393  
   394  	n, err := d.Reader.Read(d.b[end:])
   395  	//	println("more", d.i, end, end+n, n, len(d.b))
   396  	d.b = d.b[:end+n]
   397  
   398  	if n != 0 && errors.Is(err, io.EOF) {
   399  		err = nil
   400  	}
   401  
   402  	return err
   403  }
   404  
   405  func Dump(p []byte) string {
   406  	var d Dumper
   407  
   408  	_, err := d.Write(p)
   409  	if err != nil {
   410  		return err.Error()
   411  	}
   412  
   413  	return string(d.b)
   414  }
   415  
   416  func NewDumper(w io.Writer) *Dumper {
   417  	return &Dumper{
   418  		Writer: w,
   419  	}
   420  }
   421  
   422  func (w *Dumper) Write(p []byte) (i int, err error) {
   423  	w.b = w.b[:0]
   424  
   425  	var tag, l int
   426  
   427  	for i < len(p) {
   428  		if w.GlobalOffset >= 0 {
   429  			w.b = hfmt.Appendf(w.b, "%6x  ", int(w.GlobalOffset)+i)
   430  		}
   431  
   432  		w.b = hfmt.Appendf(w.b, "%4x  ", i)
   433  
   434  		w.b = hfmt.Appendf(w.b, "%6x  ", w.d.pos)
   435  
   436  		st := i
   437  
   438  		tag, l, i, err = w.d.tag(p, i)
   439  		if err != nil {
   440  			return st, err
   441  		}
   442  
   443  		//	println("loop", i, tag>>7, l)
   444  
   445  		switch {
   446  		case l == Meta:
   447  			if i == len(p) {
   448  				return st, eUnexpectedEOF
   449  			}
   450  
   451  			tag = int(p[i])
   452  			i++
   453  
   454  			l = tag &^ MetaTagMask
   455  
   456  			if l == 7 {
   457  				if i == len(p) {
   458  					return st, eUnexpectedEOF
   459  				}
   460  
   461  				l = int(p[i])
   462  				i++
   463  			} else {
   464  				l = 1 << l
   465  			}
   466  
   467  			w.b = hfmt.Appendf(w.b, "meta %2x %x  %q\n", tag>>3, l, p[i:i+l])
   468  
   469  			i += l
   470  		case tag == Literal:
   471  			w.b = hfmt.Appendf(w.b, "lit  %4x        %q\n", l, p[i:i+l])
   472  
   473  			i += l
   474  			w.d.pos += int64(l)
   475  		case tag == Copy:
   476  			var off int
   477  
   478  			off, i, err = w.d.roff(p, i)
   479  			if err != nil {
   480  				return st, err
   481  			}
   482  
   483  			w.d.pos += int64(l)
   484  
   485  			w.b = hfmt.Appendf(w.b, "copy %4x  off %4x (%4x)\n", l, off, off+l)
   486  
   487  		//	off += l
   488  		default:
   489  			panic(tag)
   490  		}
   491  	}
   492  
   493  	w.GlobalOffset += int64(i)
   494  
   495  	if w.Writer != nil {
   496  		_, err = w.Writer.Write(w.b)
   497  	}
   498  
   499  	return i, err
   500  }