github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/parsed_serialize.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"encoding/binary"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"math"
    27  	"runtime"
    28  	"sync"
    29  	"unsafe"
    30  
    31  	"github.com/klauspost/compress/s2"
    32  	"github.com/klauspost/compress/zstd"
    33  )
    34  
    35  const (
    36  	stringBits        = 14
    37  	stringSize        = 1 << stringBits
    38  	stringmask        = stringSize - 1
    39  	serializedVersion = 3
    40  )
    41  
    42  // Serializer allows to serialize parsed json and read it back.
    43  // A Serializer can be reused, but not used concurrently.
    44  type Serializer struct {
    45  	// Compressed strings
    46  	sMsg []byte
    47  
    48  	// Uncompressed tags
    49  	tagsBuf []byte
    50  	// Values
    51  	valuesBuf     []byte
    52  	valuesCompBuf []byte
    53  	tagsCompBuf   []byte
    54  
    55  	compValues, compTags uint8
    56  	compStrings          uint8
    57  	fasterComp           bool
    58  
    59  	// Deduplicated strings
    60  	stringWr     io.Writer
    61  	stringsTable [stringSize]uint32
    62  	stringBuf    []byte
    63  
    64  	maxBlockSize uint64
    65  }
    66  
    67  // NewSerializer will create and initialize a Serializer.
    68  func NewSerializer() *Serializer {
    69  	initSerializerOnce.Do(initSerializer)
    70  	var s Serializer
    71  	s.CompressMode(CompressDefault)
    72  	s.maxBlockSize = 1 << 31
    73  	return &s
    74  }
    75  
    76  type CompressMode uint8
    77  
    78  const (
    79  	// CompressNone no compression whatsoever.
    80  	CompressNone CompressMode = iota
    81  
    82  	// CompressFast will apply light compression,
    83  	// but will not deduplicate strings which may affect deserialization speed.
    84  	CompressFast
    85  
    86  	// CompressDefault applies light compression and deduplicates strings.
    87  	CompressDefault
    88  
    89  	// CompressBest
    90  	CompressBest
    91  )
    92  
    93  func (s *Serializer) CompressMode(c CompressMode) {
    94  	switch c {
    95  	case CompressNone:
    96  		s.compValues = blockTypeUncompressed
    97  		s.compTags = blockTypeUncompressed
    98  		s.compStrings = blockTypeUncompressed
    99  	case CompressFast:
   100  		s.compValues = blockTypeS2
   101  		s.compTags = blockTypeS2
   102  		s.compStrings = blockTypeS2
   103  		s.fasterComp = true
   104  	case CompressDefault:
   105  		s.compValues = blockTypeS2
   106  		s.compTags = blockTypeZstd
   107  		s.compStrings = blockTypeS2
   108  	case CompressBest:
   109  		s.compValues = blockTypeZstd
   110  		s.compTags = blockTypeZstd
   111  		s.compStrings = blockTypeZstd
   112  	default:
   113  		panic("unknown compression mode")
   114  	}
   115  }
   116  
   117  func serializeNDStream(dst io.Writer, in <-chan Stream, reuse chan<- *ParsedJson, concurrency int, comp CompressMode) error {
   118  	if concurrency <= 0 {
   119  		concurrency = (runtime.GOMAXPROCS(0) + 1) / 2
   120  	}
   121  	var wg sync.WaitGroup
   122  	wg.Add(concurrency)
   123  	type workload struct {
   124  		pj  *ParsedJson
   125  		dst chan []byte
   126  	}
   127  	var readCh = make(chan workload, concurrency)
   128  	var writeCh = make(chan chan []byte, concurrency)
   129  	dstPool := sync.Pool{
   130  		New: func() interface{} {
   131  			return make([]byte, 0, 64<<10)
   132  		},
   133  	}
   134  	for i := 0; i < concurrency; i++ {
   135  		go func() {
   136  			s := NewSerializer()
   137  			s.CompressMode(comp)
   138  			defer wg.Done()
   139  			for input := range readCh {
   140  				res := s.Serialize(dstPool.Get().([]byte)[:0], *input.pj)
   141  				input.dst <- res
   142  				select {
   143  				case reuse <- input.pj:
   144  				default:
   145  				}
   146  			}
   147  		}()
   148  	}
   149  	var writeErr error
   150  	var wwg sync.WaitGroup
   151  	wwg.Add(1)
   152  	go func() {
   153  		defer wwg.Done()
   154  		for block := range writeCh {
   155  			b := <-block
   156  			var n int
   157  			n, writeErr = dst.Write(b)
   158  			if n != len(b) {
   159  				writeErr = io.ErrShortWrite
   160  			}
   161  		}
   162  	}()
   163  	var readErr error
   164  	var rwg sync.WaitGroup
   165  	rwg.Add(1)
   166  	go func() {
   167  		defer rwg.Done()
   168  		defer close(readCh)
   169  		for block := range in {
   170  			if block.Error != nil {
   171  				readErr = block.Error
   172  			}
   173  			readCh <- workload{
   174  				pj:  block.Value,
   175  				dst: make(chan []byte, 0),
   176  			}
   177  		}
   178  	}()
   179  	rwg.Wait()
   180  	if readErr != nil {
   181  		wg.Wait()
   182  		close(writeCh)
   183  		wwg.Wait()
   184  		return readErr
   185  	}
   186  	// Read done, wait for workers...
   187  	wg.Wait()
   188  	close(writeCh)
   189  	// Wait for writer...
   190  	wwg.Wait()
   191  	return writeErr
   192  }
   193  
   194  const (
   195  	tagFloatWithFlag = Tag('e')
   196  )
   197  
   198  // Serialize the data in pj and return the data.
   199  // An optional destination can be provided.
   200  func (s *Serializer) Serialize(dst []byte, pj ParsedJson) []byte {
   201  	// Blocks:
   202  	//  - Compressed size of entire block following. Can be 0 if empty. (varuint)
   203  	//  - Block type, byte:
   204  	//     0: uncompressed, rest is data.
   205  	// 	   1: S2 compressed stream.
   206  	// 	   2: Zstd block.
   207  	//  - Compressed data.
   208  	//
   209  	// Serialized format:
   210  	// - Header: Version (byte)
   211  	// - Compressed size of remaining data (varuint). Excludes previous and size of this.
   212  	// - Tape size, uncompressed (varuint)
   213  	// - Strings size, uncompressed (varuint)
   214  	// - Strings Block: Compressed block. See above.
   215  	// - Message size, uncompressed (varuint)
   216  	// - Message Block: Compressed block. See above.
   217  	// - Uncompressed size of tags (varuint)
   218  	// - Tags Block: Compressed block. See above.
   219  	// - Uncompressed values size (varuint)
   220  	// - Values Block: Compressed block. See above.
   221  	//
   222  	// Reconstruction:
   223  	//
   224  	// Read next tag. Depending on the tag, read a number of values:
   225  	// Values:
   226  	// 	 - Null, BoolTrue/BoolFalse: No value.
   227  	//   - Nop, Skip distances must be reconstructed.
   228  	//   - TagObjectStart, TagArrayStart, TagRoot: (Offset - Current offset). Write end tag for object and array.
   229  	//   - TagObjectEnd, TagArrayEnd: No value stored, derived from start.
   230  	//   - TagInteger, TagUint, TagFloat: 64 bits
   231  	// 	 - TagString: offset, length stored.
   232  	//   - tagFloatWithFlag (v2): Contains float parsing flag.
   233  	//
   234  	// If there are any values left as tag or value, it is considered invalid.
   235  
   236  	var wg sync.WaitGroup
   237  
   238  	// Reset lookup table.
   239  	// Offsets are offset by 1, so 0 indicates an unfilled entry.
   240  	for i := range s.stringsTable[:] {
   241  		s.stringsTable[i] = 0
   242  	}
   243  	if len(s.stringBuf) > 0 {
   244  		s.stringBuf = s.stringBuf[:0]
   245  	}
   246  	if len(s.sMsg) > 0 {
   247  		s.sMsg = s.sMsg[:0]
   248  	}
   249  
   250  	msgWr, msgDone := encBlock(s.compStrings, s.sMsg, s.fasterComp)
   251  	s.stringWr = msgWr
   252  
   253  	const tagBufSize = 64 << 10
   254  	const valBufSize = 64 << 10
   255  
   256  	valWr, valDone := encBlock(s.compValues, s.valuesCompBuf, s.fasterComp)
   257  	tagWr, tagDone := encBlock(s.compTags, s.tagsCompBuf, s.fasterComp)
   258  	// Pessimistically allocate for maximum possible size.
   259  	if cap(s.tagsBuf) <= tagBufSize {
   260  		s.tagsBuf = make([]byte, tagBufSize)
   261  	}
   262  	s.tagsBuf = s.tagsBuf[:tagBufSize]
   263  
   264  	// At most one value per 2 tape entries
   265  	if cap(s.valuesBuf) < valBufSize+4 {
   266  		s.valuesBuf = make([]byte, valBufSize+4)
   267  	}
   268  
   269  	s.valuesBuf = s.valuesBuf[:0]
   270  	off := 0
   271  	tagsOff := 0
   272  	var tmp [8]byte
   273  	rawValues := 0
   274  	rawTags := 0
   275  	for off < len(pj.Tape) {
   276  		if tagsOff >= tagBufSize {
   277  			rawTags += tagsOff
   278  			tagWr.Write(s.tagsBuf[:tagsOff])
   279  			tagsOff = 0
   280  		}
   281  		if len(s.valuesBuf) >= valBufSize {
   282  			rawValues += len(s.valuesBuf)
   283  			valWr.Write(s.valuesBuf)
   284  			s.valuesBuf = s.valuesBuf[:0]
   285  		}
   286  		entry := pj.Tape[off]
   287  		ntype := Tag(entry >> 56)
   288  		payload := entry & JSONVALUEMASK
   289  
   290  		switch ntype {
   291  		case TagNop:
   292  			// We recreate the skip count when we unmarshal
   293  		case TagString:
   294  			sb, err := pj.stringByteAt(payload, pj.Tape[off+1])
   295  			if err != nil {
   296  				panic(err)
   297  			}
   298  			offset := s.indexString(sb)
   299  
   300  			binary.LittleEndian.PutUint64(tmp[:], offset)
   301  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   302  			binary.LittleEndian.PutUint64(tmp[:], uint64(len(sb)))
   303  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   304  			off++
   305  		case TagUint:
   306  			binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   307  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   308  			off++
   309  		case TagInteger:
   310  			binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   311  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   312  			off++
   313  		case TagFloat:
   314  			if payload == 0 {
   315  				binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   316  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   317  				off++
   318  			} else {
   319  				ntype = tagFloatWithFlag
   320  				binary.LittleEndian.PutUint64(tmp[:], entry)
   321  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   322  				binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   323  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   324  				off++
   325  			}
   326  		case TagNull, TagBoolTrue, TagBoolFalse:
   327  			// No value.
   328  		case TagObjectStart, TagArrayStart, TagRoot:
   329  			// TagObjectStart TagArrayStart always points forward.
   330  			// TagRoot can point either direction so we rely on under/overflow.
   331  			binary.LittleEndian.PutUint64(tmp[:], payload-uint64(off))
   332  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   333  		case TagObjectEnd, TagArrayEnd, TagEnd:
   334  			// Value can be deducted from start tag or no value.
   335  		default:
   336  			wg.Wait()
   337  			panic(fmt.Errorf("unknown tag: %d", int(ntype)))
   338  		}
   339  		s.tagsBuf[tagsOff] = uint8(ntype)
   340  		tagsOff++
   341  		off++
   342  	}
   343  	if tagsOff > 0 {
   344  		rawTags += tagsOff
   345  		tagWr.Write(s.tagsBuf[:tagsOff])
   346  	}
   347  	if len(s.valuesBuf) > 0 {
   348  		rawValues += len(s.valuesBuf)
   349  		valWr.Write(s.valuesBuf)
   350  	}
   351  	wg.Add(3)
   352  	go func() {
   353  		var err error
   354  		s.tagsCompBuf, err = tagDone()
   355  		if err != nil {
   356  			panic(err)
   357  		}
   358  		wg.Done()
   359  	}()
   360  	go func() {
   361  		var err error
   362  		s.valuesCompBuf, err = valDone()
   363  		if err != nil {
   364  			panic(err)
   365  		}
   366  		wg.Done()
   367  	}()
   368  	go func() {
   369  		var err error
   370  		s.sMsg, err = msgDone()
   371  		if err != nil {
   372  			panic(err)
   373  		}
   374  		wg.Done()
   375  	}()
   376  
   377  	// Wait for compressors
   378  	wg.Wait()
   379  
   380  	// Version
   381  	dst = append(dst, serializedVersion)
   382  
   383  	// Size of varints...
   384  	varInts := binary.PutUvarint(tmp[:], uint64(0)) +
   385  		binary.PutUvarint(tmp[:], uint64(len(s.sMsg))) +
   386  		binary.PutUvarint(tmp[:], uint64(rawTags)) +
   387  		binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf))) +
   388  		binary.PutUvarint(tmp[:], uint64(rawValues)) +
   389  		binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf))) +
   390  		binary.PutUvarint(tmp[:], uint64(len(s.stringBuf))) +
   391  		binary.PutUvarint(tmp[:], uint64(len(pj.Tape)))
   392  
   393  	n := binary.PutUvarint(tmp[:], uint64(1+len(s.sMsg)+len(s.tagsCompBuf)+len(s.valuesCompBuf)+varInts))
   394  	dst = append(dst, tmp[:n]...)
   395  
   396  	// Tape elements, uncompressed.
   397  	n = binary.PutUvarint(tmp[:], uint64(len(pj.Tape)))
   398  	dst = append(dst, tmp[:n]...)
   399  
   400  	// Strings uncompressed size
   401  	dst = append(dst, 0)
   402  	// Strings
   403  	dst = append(dst, 0)
   404  
   405  	// Messages uncompressed size
   406  	n = binary.PutUvarint(tmp[:], uint64(len(s.stringBuf)))
   407  	dst = append(dst, tmp[:n]...)
   408  	// Message
   409  	n = binary.PutUvarint(tmp[:], uint64(len(s.sMsg)))
   410  	dst = append(dst, tmp[:n]...)
   411  	dst = append(dst, s.sMsg...)
   412  
   413  	// Tags
   414  	n = binary.PutUvarint(tmp[:], uint64(rawTags))
   415  	dst = append(dst, tmp[:n]...)
   416  	n = binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf)))
   417  	dst = append(dst, tmp[:n]...)
   418  	dst = append(dst, s.tagsCompBuf...)
   419  
   420  	// Values
   421  	n = binary.PutUvarint(tmp[:], uint64(rawValues))
   422  	dst = append(dst, tmp[:n]...)
   423  	n = binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf)))
   424  	dst = append(dst, tmp[:n]...)
   425  	dst = append(dst, s.valuesCompBuf...)
   426  	if false {
   427  		fmt.Println("strings:", len(pj.Strings.B)+len(pj.Message), "->", len(s.sMsg), "tags:", rawTags, "->", len(s.tagsCompBuf), "values:", rawValues, "->", len(s.valuesCompBuf), "Total:", len(pj.Message)+len(pj.Strings.B)+len(pj.Tape)*8, "->", len(dst))
   428  	}
   429  
   430  	return dst
   431  }
   432  
   433  func (s *Serializer) splitBlocks(r io.Reader, out chan []byte) error {
   434  	br := bufio.NewReader(r)
   435  	defer close(out)
   436  	for {
   437  		if v, err := br.ReadByte(); err != nil {
   438  			return err
   439  		} else if v != 1 {
   440  			return errors.New("unknown version")
   441  		}
   442  
   443  		// Comp size
   444  		c, err := binary.ReadUvarint(br)
   445  		if err != nil {
   446  			return err
   447  		}
   448  		if c > s.maxBlockSize {
   449  			return errors.New("compressed block too big")
   450  		}
   451  		block := make([]byte, c)
   452  		n, err := io.ReadFull(br, block)
   453  		if err != nil {
   454  			return err
   455  		}
   456  		if n > 0 {
   457  			out <- block
   458  		}
   459  	}
   460  }
   461  
   462  // Deserialize the content in src.
   463  // Only basic sanity checks will be performed.
   464  // Slight corruption will likely go through unnoticed.
   465  // And optional destination can be provided.
   466  func (s *Serializer) Deserialize(src []byte, dst *ParsedJson) (*ParsedJson, error) {
   467  	br := bytes.NewBuffer(src)
   468  
   469  	if v, err := br.ReadByte(); err != nil {
   470  		return dst, err
   471  	} else if v > serializedVersion {
   472  		// v3 reads v2.
   473  		// v2 reads v1.
   474  		return dst, errors.New("unknown version")
   475  	}
   476  
   477  	if dst == nil {
   478  		dst = &ParsedJson{}
   479  	}
   480  
   481  	// Comp size
   482  	if c, err := binary.ReadUvarint(br); err != nil {
   483  		return dst, err
   484  	} else {
   485  		if int(c) > br.Len() {
   486  			return dst, fmt.Errorf("stream too short, want %d, only have %d left", c, br.Len())
   487  		}
   488  		if int(c) > br.Len() {
   489  			fmt.Println("extra length:", int(c), br.Len())
   490  		}
   491  	}
   492  
   493  	// Tape size
   494  	if ts, err := binary.ReadUvarint(br); err != nil {
   495  		return dst, err
   496  	} else {
   497  		if uint64(cap(dst.Tape)) < ts {
   498  			dst.Tape = make([]uint64, ts)
   499  		}
   500  		dst.Tape = dst.Tape[:ts]
   501  	}
   502  
   503  	// String size
   504  	if ss, err := binary.ReadUvarint(br); err != nil {
   505  		return dst, err
   506  	} else {
   507  		if dst.Strings == nil || uint64(cap(dst.Strings.B)) < ss {
   508  			dst.Strings = &TStrings{B: make([]byte, ss)}
   509  		}
   510  		dst.Strings.B = dst.Strings.B[:ss]
   511  	}
   512  
   513  	// Decompress strings
   514  	var sWG sync.WaitGroup
   515  	var stringsErr, msgErr error
   516  	err := s.decBlock(br, dst.Strings.B, &sWG, &stringsErr)
   517  	if err != nil {
   518  		return dst, err
   519  	}
   520  
   521  	// Message size
   522  	if ss, err := binary.ReadUvarint(br); err != nil {
   523  		return dst, err
   524  	} else {
   525  		if uint64(cap(dst.Message)) < ss || dst.Message == nil {
   526  			dst.Message = make([]byte, ss)
   527  		}
   528  		dst.Message = dst.Message[:ss]
   529  	}
   530  
   531  	// Messages
   532  	err = s.decBlock(br, dst.Message, &sWG, &msgErr)
   533  	if err != nil {
   534  		return dst, err
   535  	}
   536  	defer sWG.Wait()
   537  
   538  	// Decompress tags
   539  	if tags, err := binary.ReadUvarint(br); err != nil {
   540  		return dst, err
   541  	} else {
   542  		if uint64(cap(s.tagsBuf)) < tags {
   543  			s.tagsBuf = make([]byte, tags)
   544  		}
   545  		s.tagsBuf = s.tagsBuf[:tags]
   546  	}
   547  
   548  	var wg sync.WaitGroup
   549  	var tagsErr error
   550  	err = s.decBlock(br, s.tagsBuf, &wg, &tagsErr)
   551  	if err != nil {
   552  		return dst, fmt.Errorf("decompressing tags: %w", err)
   553  	}
   554  	defer wg.Wait()
   555  
   556  	// Decompress values
   557  	if vals, err := binary.ReadUvarint(br); err != nil {
   558  		return dst, err
   559  	} else {
   560  		if uint64(cap(s.valuesBuf)) < vals {
   561  			s.valuesBuf = make([]byte, vals)
   562  		}
   563  		s.valuesBuf = s.valuesBuf[:vals]
   564  	}
   565  
   566  	var valsErr error
   567  	err = s.decBlock(br, s.valuesBuf, &wg, &valsErr)
   568  	if err != nil {
   569  		return dst, fmt.Errorf("decompressing values: %w", err)
   570  	}
   571  
   572  	// Wait until we have what we need for the tape.
   573  	wg.Wait()
   574  	switch {
   575  	case tagsErr != nil:
   576  		return dst, fmt.Errorf("decompressing tags: %w", tagsErr)
   577  	case valsErr != nil:
   578  		return dst, fmt.Errorf("decompressing values: %w", valsErr)
   579  	}
   580  
   581  	// Reconstruct tape:
   582  	var off int
   583  	values := s.valuesBuf
   584  	nSkips := 0
   585  	for _, t := range s.tagsBuf {
   586  		if off == len(dst.Tape) {
   587  			return dst, errors.New("tags extended beyond tape")
   588  		}
   589  		tag := Tag(t)
   590  
   591  		tagDst := uint64(t) << 56
   592  		if nSkips > 0 && tag != TagNop {
   593  			// We owe skips. Add with jumps
   594  			for i := 0; i < nSkips; i++ {
   595  				dst.Tape[off] = (uint64(TagNop) << JSONTAGOFFSET) | uint64(nSkips-i)
   596  				off++
   597  			}
   598  			nSkips = 0
   599  		}
   600  		switch tag {
   601  		case TagNop:
   602  			nSkips++
   603  		case TagString:
   604  			if len(values) < 16 {
   605  				return dst, fmt.Errorf("reading %v: no values left", tag)
   606  			}
   607  			sOffset := binary.LittleEndian.Uint64(values[:8])
   608  			sLen := binary.LittleEndian.Uint64(values[8:16])
   609  			values = values[16:]
   610  
   611  			dst.Tape[off] = tagDst | sOffset
   612  			dst.Tape[off+1] = sLen
   613  			off += 2
   614  		case TagFloat, TagInteger, TagUint:
   615  			if len(values) < 8 {
   616  				return dst, fmt.Errorf("reading %v: no values left", tag)
   617  			}
   618  			dst.Tape[off] = tagDst
   619  			dst.Tape[off+1] = binary.LittleEndian.Uint64(values[:8])
   620  			values = values[8:]
   621  			off += 2
   622  		case tagFloatWithFlag:
   623  			// Tape contains full value
   624  			if len(values) < 16 {
   625  				return dst, fmt.Errorf("reading %v: no values left", tag)
   626  			}
   627  			dst.Tape[off] = binary.LittleEndian.Uint64(values[:8])
   628  			dst.Tape[off+1] = binary.LittleEndian.Uint64(values[8:16])
   629  			values = values[16:]
   630  			off += 2
   631  		case TagNull, TagBoolTrue, TagBoolFalse, TagEnd:
   632  			dst.Tape[off] = tagDst
   633  			off++
   634  		case TagObjectStart, TagArrayStart:
   635  			if len(values) < 8 {
   636  				return dst, fmt.Errorf("reading %v: no values left", tag)
   637  			}
   638  			// Always forward
   639  			val := binary.LittleEndian.Uint64(values[:8])
   640  			values = values[8:]
   641  			val += uint64(off)
   642  			if val > uint64(len(dst.Tape)) {
   643  				return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val)
   644  			}
   645  
   646  			dst.Tape[off] = tagDst | val
   647  			// Write closing...
   648  			dst.Tape[val-1] = uint64(tagOpenToClose[tag])<<56 | uint64(off)
   649  
   650  			off++
   651  		case TagRoot:
   652  			if len(values) < 8 {
   653  				return dst, fmt.Errorf("reading %v: no values left", tag)
   654  			}
   655  			// Always forward
   656  			val := binary.LittleEndian.Uint64(values[:8])
   657  			values = values[8:]
   658  			val += uint64(off)
   659  			if val > uint64(len(dst.Tape)) {
   660  				return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val)
   661  			}
   662  
   663  			dst.Tape[off] = tagDst | val
   664  
   665  			off++
   666  		case TagObjectEnd, TagArrayEnd:
   667  			// This should already have been written.
   668  			if dst.Tape[off]&JSONTAGMASK != tagDst {
   669  				return dst, fmt.Errorf("reading %v, offset:%d, start tag did not match %x != %x", tag, off, dst.Tape[off]>>56, uint8(tagDst))
   670  			}
   671  			off++
   672  		default:
   673  			return nil, fmt.Errorf("unknown tag: %v", tag)
   674  		}
   675  	}
   676  	if nSkips > 0 {
   677  		// We owe skips. Add with jumps
   678  		for i := 0; i < nSkips; i++ {
   679  			dst.Tape[off] = (uint64(TagNop) << JSONTAGOFFSET) | uint64(nSkips-i)
   680  			off++
   681  		}
   682  		nSkips = 0
   683  	}
   684  	sWG.Wait()
   685  	if off != len(dst.Tape) {
   686  		return dst, fmt.Errorf("tags did not fill tape, want %d, got %d", len(dst.Tape), off)
   687  	}
   688  	if len(values) > 0 {
   689  		return dst, fmt.Errorf("values did not fill tape, want %d, got %d", len(dst.Tape), off)
   690  	}
   691  	if stringsErr != nil {
   692  		return dst, fmt.Errorf("reading strings: %w", stringsErr)
   693  	}
   694  	return dst, nil
   695  }
   696  
   697  func (s *Serializer) decBlock(br *bytes.Buffer, dst []byte, wg *sync.WaitGroup, dstErr *error) error {
   698  	size, err := binary.ReadUvarint(br)
   699  	if err != nil {
   700  		return err
   701  	}
   702  	if size > uint64(br.Len()) {
   703  		return fmt.Errorf("block size (%d) extends beyond input %d", size, br.Len())
   704  	}
   705  	if size == 0 && len(dst) == 0 {
   706  		// Nothing, no compress type
   707  		return nil
   708  	}
   709  	if size < 1 {
   710  		return fmt.Errorf("block size (%d) too small %d", size, br.Len())
   711  	}
   712  
   713  	typ, err := br.ReadByte()
   714  	if err != nil {
   715  		return err
   716  	}
   717  	size--
   718  	compressed := br.Next(int(size))
   719  	if len(compressed) != int(size) {
   720  		return errors.New("short block section")
   721  	}
   722  	switch typ {
   723  	case blockTypeUncompressed:
   724  		// uncompressed
   725  		if len(compressed) != len(dst) {
   726  			return fmt.Errorf("short uncompressed block: in (%d) != out (%d)", len(compressed), len(dst))
   727  		}
   728  		copy(dst, compressed)
   729  	case blockTypeS2:
   730  		wg.Add(1)
   731  		go func() {
   732  			defer wg.Done()
   733  			buf := bytes.NewBuffer(compressed)
   734  			dec := s2Readers.Get().(*s2.Reader)
   735  			dec.Reset(buf)
   736  			_, err := io.ReadFull(dec, dst)
   737  			dec.Reset(nil)
   738  			s2Readers.Put(dec)
   739  			*dstErr = err
   740  		}()
   741  	case blockTypeZstd:
   742  		wg.Add(1)
   743  		go func() {
   744  			defer wg.Done()
   745  			want := len(dst)
   746  			dst, err = zDec.DecodeAll(compressed, dst[:0])
   747  			if err == nil && want != len(dst) {
   748  				err = errors.New("zstd decompressed size mismatch")
   749  			}
   750  			*dstErr = err
   751  		}()
   752  	default:
   753  		return fmt.Errorf("unknown compression type: %d", typ)
   754  	}
   755  	return nil
   756  }
   757  
   758  const (
   759  	blockTypeUncompressed byte = 0
   760  	blockTypeS2           byte = 1
   761  	blockTypeZstd         byte = 2
   762  )
   763  
   764  var zDec *zstd.Decoder
   765  
   766  var zEncFast = sync.Pool{New: func() interface{} {
   767  	e, _ := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedDefault), zstd.WithEncoderCRC(false))
   768  	return e
   769  }}
   770  
   771  var s2FastWriters = sync.Pool{New: func() interface{} {
   772  	return s2.NewWriter(nil)
   773  }}
   774  
   775  var s2Writers = sync.Pool{New: func() interface{} {
   776  	return s2.NewWriter(nil, s2.WriterBetterCompression())
   777  }}
   778  var s2Readers = sync.Pool{New: func() interface{} {
   779  	return s2.NewReader(nil)
   780  }}
   781  
   782  var initSerializerOnce sync.Once
   783  
   784  func initSerializer() {
   785  	zDec, _ = zstd.NewReader(nil)
   786  }
   787  
   788  type encodedResult func() ([]byte, error)
   789  
   790  // encBlock will encode a block of data.
   791  func encBlock(mode byte, buf []byte, fast bool) (io.Writer, encodedResult) {
   792  	dst := bytes.NewBuffer(buf[:0])
   793  	dst.WriteByte(mode)
   794  	switch mode {
   795  	case blockTypeUncompressed:
   796  		return dst, func() ([]byte, error) {
   797  			return dst.Bytes(), nil
   798  		}
   799  	case blockTypeS2:
   800  		var enc *s2.Writer
   801  		var put *sync.Pool
   802  		if fast {
   803  			enc = s2FastWriters.Get().(*s2.Writer)
   804  			put = &s2FastWriters
   805  		} else {
   806  			enc = s2Writers.Get().(*s2.Writer)
   807  			put = &s2Writers
   808  		}
   809  		enc.Reset(dst)
   810  		return enc, func() (i []byte, err error) {
   811  			err = enc.Close()
   812  			if err != nil {
   813  				return nil, err
   814  			}
   815  			enc.Reset(nil)
   816  			put.Put(enc)
   817  			return dst.Bytes(), nil
   818  		}
   819  	case blockTypeZstd:
   820  		enc := zEncFast.Get().(*zstd.Encoder)
   821  		enc.Reset(dst)
   822  		return enc, func() (i []byte, err error) {
   823  			err = enc.Close()
   824  			if err != nil {
   825  				return nil, err
   826  			}
   827  			enc.Reset(nil)
   828  			zEncFast.Put(enc)
   829  			return dst.Bytes(), nil
   830  		}
   831  	}
   832  	panic("unknown compression mode")
   833  }
   834  
   835  // indexString will deduplicate strings and populate
   836  func (s *Serializer) indexString(sb []byte) (offset uint64) {
   837  	// Only possible on 64 bit platforms, so it will never trigger on 32 bit platforms.
   838  	if uint32(len(sb)) >= math.MaxUint32 {
   839  		panic("string too long")
   840  	}
   841  
   842  	h := memHash(sb) & stringmask
   843  	off := int(s.stringsTable[h]) - 1
   844  	end := off + len(sb)
   845  	if off >= 0 && end <= len(s.stringBuf) {
   846  		found := s.stringBuf[off:end]
   847  		if bytes.Equal(found, sb) {
   848  			return uint64(off)
   849  		}
   850  		// It didn't match :(
   851  	}
   852  	off = len(s.stringBuf)
   853  	s.stringBuf = append(s.stringBuf, sb...)
   854  	s.stringsTable[h] = uint32(off + 1)
   855  	s.stringWr.Write(sb)
   856  	return uint64(off)
   857  }
   858  
   859  //go:noescape
   860  //go:linkname memhash runtime.memhash
   861  func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   862  
   863  // memHash is the hash function used by go map, it utilizes available hardware instructions (behaves
   864  // as aeshash if aes instruction is available).
   865  // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
   866  func memHash(data []byte) uint64 {
   867  	ss := (*stringStruct)(unsafe.Pointer(&data))
   868  	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
   869  }
   870  
   871  type stringStruct struct {
   872  	str unsafe.Pointer
   873  	len int
   874  }