github.com/dgraph-io/simdjson-go@v0.3.0/parsed_serialize.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"encoding/binary"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"math"
    27  	"runtime"
    28  	"sync"
    29  	"unsafe"
    30  
    31  	"github.com/klauspost/compress/s2"
    32  	"github.com/klauspost/compress/zstd"
    33  )
    34  
    35  const (
    36  	stringBits        = 14
    37  	stringSize        = 1 << stringBits
    38  	stringmask        = stringSize - 1
    39  	serializedVersion = 2
    40  )
    41  
    42  // Serializer allows to serialize parsed json and read it back.
    43  // A Serializer can be reused, but not used concurrently.
    44  type Serializer struct {
    45  	// Compressed strings
    46  	sMsg []byte
    47  
    48  	// Uncompressed tags
    49  	tagsBuf []byte
    50  	// Values
    51  	valuesBuf     []byte
    52  	valuesCompBuf []byte
    53  	tagsCompBuf   []byte
    54  
    55  	compValues, compTags uint8
    56  	compStrings          uint8
    57  	fasterComp           bool
    58  
    59  	// Deduplicated strings
    60  	stringWr     io.Writer
    61  	stringsTable [stringSize]uint32
    62  	stringBuf    []byte
    63  
    64  	maxBlockSize uint64
    65  }
    66  
    67  // NewSerializer will create and initialize a Serializer.
    68  func NewSerializer() *Serializer {
    69  	initSerializerOnce.Do(initSerializer)
    70  	var s Serializer
    71  	s.CompressMode(CompressDefault)
    72  	s.maxBlockSize = 1 << 31
    73  	return &s
    74  }
    75  
    76  type CompressMode uint8
    77  
    78  const (
    79  	// CompressNone no compression whatsoever.
    80  	CompressNone CompressMode = iota
    81  
    82  	// CompressFast will apply light compression,
    83  	// but will not deduplicate strings which may affect deserialization speed.
    84  	CompressFast
    85  
    86  	// CompressDefault applies light compression and deduplicates strings.
    87  	CompressDefault
    88  
    89  	// CompressBest
    90  	CompressBest
    91  )
    92  
    93  func (s *Serializer) CompressMode(c CompressMode) {
    94  	switch c {
    95  	case CompressNone:
    96  		s.compValues = blockTypeUncompressed
    97  		s.compTags = blockTypeUncompressed
    98  		s.compStrings = blockTypeUncompressed
    99  	case CompressFast:
   100  		s.compValues = blockTypeS2
   101  		s.compTags = blockTypeS2
   102  		s.compStrings = blockTypeS2
   103  		s.fasterComp = true
   104  	case CompressDefault:
   105  		s.compValues = blockTypeS2
   106  		s.compTags = blockTypeS2
   107  		s.compStrings = blockTypeS2
   108  	case CompressBest:
   109  		s.compValues = blockTypeZstd
   110  		s.compTags = blockTypeZstd
   111  		s.compStrings = blockTypeZstd
   112  	default:
   113  		panic("unknown compression mode")
   114  	}
   115  }
   116  
   117  func serializeNDStream(dst io.Writer, in <-chan Stream, reuse chan<- *ParsedJson, concurrency int, comp CompressMode) error {
   118  	if concurrency <= 0 {
   119  		concurrency = (runtime.GOMAXPROCS(0) + 1) / 2
   120  	}
   121  	var wg sync.WaitGroup
   122  	wg.Add(concurrency)
   123  	type workload struct {
   124  		pj  *ParsedJson
   125  		dst chan []byte
   126  	}
   127  	var readCh = make(chan workload, concurrency)
   128  	var writeCh = make(chan chan []byte, concurrency)
   129  	dstPool := sync.Pool{
   130  		New: func() interface{} {
   131  			return make([]byte, 0, 64<<10)
   132  		},
   133  	}
   134  	for i := 0; i < concurrency; i++ {
   135  		go func() {
   136  			s := NewSerializer()
   137  			s.CompressMode(comp)
   138  			defer wg.Done()
   139  			for input := range readCh {
   140  				res := s.Serialize(dstPool.Get().([]byte)[:0], *input.pj)
   141  				input.dst <- res
   142  				select {
   143  				case reuse <- input.pj:
   144  				default:
   145  				}
   146  			}
   147  		}()
   148  	}
   149  	var writeErr error
   150  	var wwg sync.WaitGroup
   151  	wwg.Add(1)
   152  	go func() {
   153  		defer wwg.Done()
   154  		for block := range writeCh {
   155  			b := <-block
   156  			var n int
   157  			n, writeErr = dst.Write(b)
   158  			if n != len(b) {
   159  				writeErr = io.ErrShortWrite
   160  			}
   161  		}
   162  	}()
   163  	var readErr error
   164  	var rwg sync.WaitGroup
   165  	rwg.Add(1)
   166  	go func() {
   167  		defer rwg.Done()
   168  		defer close(readCh)
   169  		for block := range in {
   170  			if block.Error != nil {
   171  				readErr = block.Error
   172  			}
   173  			readCh <- workload{
   174  				pj:  block.Value,
   175  				dst: make(chan []byte, 0),
   176  			}
   177  		}
   178  	}()
   179  	rwg.Wait()
   180  	if readErr != nil {
   181  		wg.Wait()
   182  		close(writeCh)
   183  		wwg.Wait()
   184  		return readErr
   185  	}
   186  	// Read done, wait for workers...
   187  	wg.Wait()
   188  	close(writeCh)
   189  	// Wait for writer...
   190  	wwg.Wait()
   191  	return writeErr
   192  }
   193  
   194  const (
   195  	tagFloatWithFlag = Tag('e')
   196  )
   197  
   198  // Serialize the data in pj and return the data.
   199  // An optional destination can be provided.
   200  func (s *Serializer) Serialize(dst []byte, pj ParsedJson) []byte {
   201  	// Blocks:
   202  	//  - Compressed size of entire block following. Can be 0 if empty. (varuint)
   203  	//  - Block type, byte:
   204  	//     0: uncompressed, rest is data.
   205  	// 	   1: S2 compressed stream.
   206  	// 	   2: Zstd block.
   207  	//  - Compressed data.
   208  	//
   209  	// Serialized format:
   210  	// - Header: Version (byte)
   211  	// - Compressed size of remaining data (varuint). Excludes previous and size of this.
   212  	// - Tape size, uncompressed (varuint)
   213  	// - Strings size, uncompressed (varuint)
   214  	// - Strings Block: Compressed block. See above.
   215  	// - Message size, uncompressed (varuint)
   216  	// - Message Block: Compressed block. See above.
   217  	// - Uncompressed size of tags (varuint)
   218  	// - Tags Block: Compressed block. See above.
   219  	// - Uncompressed values size (varuint)
   220  	// - Values Block: Compressed block. See above.
   221  	//
   222  	// Reconstruction:
   223  	//
   224  	// Read next tag. Depending on the tag, read a number of values:
   225  	// Values:
   226  	// 	 - Null, BoolTrue/BoolFalse: No value.
   227  	//   - TagObjectStart, TagArrayStart, TagRoot: (Offset - Current offset). Write end tag for object and array.
   228  	//   - TagObjectEnd, TagArrayEnd: No value stored, derived from start.
   229  	//   - TagInteger, TagUint, TagFloat: 64 bits
   230  	// 	 - TagString: offset, length stored.
   231  	//   - tagFloatWithFlag (v2): Contains float parsing flag.
   232  	//
   233  	// If there are any values left as tag or value, it is considered invalid.
   234  
   235  	var wg sync.WaitGroup
   236  
   237  	// Reset lookup table.
   238  	// Offsets are offset by 1, so 0 indicates an unfilled entry.
   239  	for i := range s.stringsTable[:] {
   240  		s.stringsTable[i] = 0
   241  	}
   242  	if len(s.stringBuf) > 0 {
   243  		s.stringBuf = s.stringBuf[:0]
   244  	}
   245  	if len(s.sMsg) > 0 {
   246  		s.sMsg = s.sMsg[:0]
   247  	}
   248  
   249  	msgWr, msgDone := encBlock(s.compStrings, s.sMsg, s.fasterComp)
   250  	s.stringWr = msgWr
   251  
   252  	const tagBufSize = 64 << 10
   253  	const valBufSize = 64 << 10
   254  
   255  	valWr, valDone := encBlock(s.compValues, s.valuesCompBuf, s.fasterComp)
   256  	tagWr, tagDone := encBlock(s.compTags, s.tagsCompBuf, s.fasterComp)
   257  	// Pessimistically allocate for maximum possible size.
   258  	if cap(s.tagsBuf) <= tagBufSize {
   259  		s.tagsBuf = make([]byte, tagBufSize)
   260  	}
   261  	s.tagsBuf = s.tagsBuf[:tagBufSize]
   262  
   263  	// At most one value per 2 tape entries
   264  	if cap(s.valuesBuf) < valBufSize+4 {
   265  		s.valuesBuf = make([]byte, valBufSize+4)
   266  	}
   267  
   268  	s.valuesBuf = s.valuesBuf[:0]
   269  	off := 0
   270  	tagsOff := 0
   271  	var tmp [8]byte
   272  	rawValues := 0
   273  	rawTags := 0
   274  	for off < len(pj.Tape) {
   275  		if tagsOff >= tagBufSize {
   276  			rawTags += tagsOff
   277  			tagWr.Write(s.tagsBuf[:tagsOff])
   278  			tagsOff = 0
   279  		}
   280  		if len(s.valuesBuf) >= valBufSize {
   281  			rawValues += len(s.valuesBuf)
   282  			valWr.Write(s.valuesBuf)
   283  			s.valuesBuf = s.valuesBuf[:0]
   284  		}
   285  		entry := pj.Tape[off]
   286  		ntype := Tag(entry >> 56)
   287  		payload := entry & JSONVALUEMASK
   288  
   289  		switch ntype {
   290  		case TagString:
   291  			sb, err := pj.stringByteAt(payload, pj.Tape[off+1])
   292  			if err != nil {
   293  				panic(err)
   294  			}
   295  			offset := s.indexString(sb)
   296  
   297  			binary.LittleEndian.PutUint64(tmp[:], offset)
   298  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   299  			binary.LittleEndian.PutUint64(tmp[:], uint64(len(sb)))
   300  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   301  			off++
   302  		case TagUint:
   303  			binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   304  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   305  			off++
   306  		case TagInteger:
   307  			binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   308  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   309  			off++
   310  		case TagFloat:
   311  			if payload == 0 {
   312  				binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   313  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   314  				off++
   315  			} else {
   316  				ntype = tagFloatWithFlag
   317  				binary.LittleEndian.PutUint64(tmp[:], entry)
   318  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   319  				binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1])
   320  				s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   321  				off++
   322  			}
   323  		case TagNull, TagBoolTrue, TagBoolFalse:
   324  			// No value.
   325  		case TagObjectStart, TagArrayStart, TagRoot:
   326  			// TagObjectStart TagArrayStart always points forward.
   327  			// TagRoot can point either direction so we rely on under/overflow.
   328  			binary.LittleEndian.PutUint64(tmp[:], payload-uint64(off))
   329  			s.valuesBuf = append(s.valuesBuf, tmp[:]...)
   330  		case TagObjectEnd, TagArrayEnd, TagEnd:
   331  			// Value can be deducted from start tag or no value.
   332  		default:
   333  			wg.Wait()
   334  			panic(fmt.Errorf("unknown tag: %d", int(ntype)))
   335  		}
   336  		s.tagsBuf[tagsOff] = uint8(ntype)
   337  		tagsOff++
   338  		off++
   339  	}
   340  	if tagsOff > 0 {
   341  		rawTags += tagsOff
   342  		tagWr.Write(s.tagsBuf[:tagsOff])
   343  	}
   344  	if len(s.valuesBuf) > 0 {
   345  		rawValues += len(s.valuesBuf)
   346  		valWr.Write(s.valuesBuf)
   347  	}
   348  	wg.Add(3)
   349  	go func() {
   350  		var err error
   351  		s.tagsCompBuf, err = tagDone()
   352  		if err != nil {
   353  			panic(err)
   354  		}
   355  		wg.Done()
   356  	}()
   357  	go func() {
   358  		var err error
   359  		s.valuesCompBuf, err = valDone()
   360  		if err != nil {
   361  			panic(err)
   362  		}
   363  		wg.Done()
   364  	}()
   365  	go func() {
   366  		var err error
   367  		s.sMsg, err = msgDone()
   368  		if err != nil {
   369  			panic(err)
   370  		}
   371  		wg.Done()
   372  	}()
   373  
   374  	// Wait for compressors
   375  	wg.Wait()
   376  
   377  	// Version
   378  	dst = append(dst, serializedVersion)
   379  
   380  	// Size of varints...
   381  	varInts := binary.PutUvarint(tmp[:], uint64(0)) +
   382  		binary.PutUvarint(tmp[:], uint64(len(s.sMsg))) +
   383  		binary.PutUvarint(tmp[:], uint64(rawTags)) +
   384  		binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf))) +
   385  		binary.PutUvarint(tmp[:], uint64(rawValues)) +
   386  		binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf))) +
   387  		binary.PutUvarint(tmp[:], uint64(len(s.stringBuf))) +
   388  		binary.PutUvarint(tmp[:], uint64(len(pj.Tape)))
   389  
   390  	n := binary.PutUvarint(tmp[:], uint64(1+len(s.sMsg)+len(s.tagsCompBuf)+len(s.valuesCompBuf)+varInts))
   391  	dst = append(dst, tmp[:n]...)
   392  
   393  	// Tape elements, uncompressed.
   394  	n = binary.PutUvarint(tmp[:], uint64(len(pj.Tape)))
   395  	dst = append(dst, tmp[:n]...)
   396  
   397  	// Strings uncompressed size
   398  	dst = append(dst, 0)
   399  	// Strings
   400  	dst = append(dst, 0)
   401  
   402  	// Messages uncompressed size
   403  	n = binary.PutUvarint(tmp[:], uint64(len(s.stringBuf)))
   404  	dst = append(dst, tmp[:n]...)
   405  	// Message
   406  	n = binary.PutUvarint(tmp[:], uint64(len(s.sMsg)))
   407  	dst = append(dst, tmp[:n]...)
   408  	dst = append(dst, s.sMsg...)
   409  
   410  	// Tags
   411  	n = binary.PutUvarint(tmp[:], uint64(rawTags))
   412  	dst = append(dst, tmp[:n]...)
   413  	n = binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf)))
   414  	dst = append(dst, tmp[:n]...)
   415  	dst = append(dst, s.tagsCompBuf...)
   416  
   417  	// Values
   418  	n = binary.PutUvarint(tmp[:], uint64(rawValues))
   419  	dst = append(dst, tmp[:n]...)
   420  	n = binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf)))
   421  	dst = append(dst, tmp[:n]...)
   422  	dst = append(dst, s.valuesCompBuf...)
   423  	if false {
   424  		fmt.Println("strings:", len(pj.Strings)+len(pj.Message), "->", len(s.sMsg), "tags:", rawTags, "->", len(s.tagsCompBuf), "values:", rawValues, "->", len(s.valuesCompBuf), "Total:", len(pj.Message)+len(pj.Strings)+len(pj.Tape)*8, "->", len(dst))
   425  	}
   426  
   427  	return dst
   428  }
   429  
   430  func (s *Serializer) splitBlocks(r io.Reader, out chan []byte) error {
   431  	br := bufio.NewReader(r)
   432  	defer close(out)
   433  	for {
   434  		if v, err := br.ReadByte(); err != nil {
   435  			return err
   436  		} else if v != 1 {
   437  			return errors.New("unknown version")
   438  		}
   439  
   440  		// Comp size
   441  		c, err := binary.ReadUvarint(br)
   442  		if err != nil {
   443  			return err
   444  		}
   445  		if c > s.maxBlockSize {
   446  			return errors.New("compressed block too big")
   447  		}
   448  		block := make([]byte, c)
   449  		n, err := io.ReadFull(br, block)
   450  		if err != nil {
   451  			return err
   452  		}
   453  		if n > 0 {
   454  			out <- block
   455  		}
   456  	}
   457  }
   458  
   459  // Deserialize the content in src.
   460  // Only basic sanity checks will be performed.
   461  // Slight corruption will likely go through unnoticed.
   462  // And optional destination can be provided.
   463  func (s *Serializer) Deserialize(src []byte, dst *ParsedJson) (*ParsedJson, error) {
   464  	br := bytes.NewBuffer(src)
   465  
   466  	if v, err := br.ReadByte(); err != nil {
   467  		return dst, err
   468  	} else if v > serializedVersion {
   469  		// v2 reads v1.
   470  		return dst, errors.New("unknown version")
   471  	}
   472  
   473  	if dst == nil {
   474  		dst = &ParsedJson{}
   475  	}
   476  
   477  	// Comp size
   478  	if c, err := binary.ReadUvarint(br); err != nil {
   479  		return dst, err
   480  	} else {
   481  		if int(c) > br.Len() {
   482  			return dst, fmt.Errorf("stream too short, want %d, only have %d left", c, br.Len())
   483  		}
   484  		if int(c) > br.Len() {
   485  			fmt.Println("extra length:", int(c), br.Len())
   486  		}
   487  	}
   488  
   489  	// Tape size
   490  	if ts, err := binary.ReadUvarint(br); err != nil {
   491  		return dst, err
   492  	} else {
   493  		if uint64(cap(dst.Tape)) < ts {
   494  			dst.Tape = make([]uint64, ts)
   495  		}
   496  		dst.Tape = dst.Tape[:ts]
   497  	}
   498  
   499  	// String size
   500  	if ss, err := binary.ReadUvarint(br); err != nil {
   501  		return dst, err
   502  	} else {
   503  		if uint64(cap(dst.Strings)) < ss || dst.Strings == nil {
   504  			dst.Strings = make([]byte, ss)
   505  		}
   506  		dst.Strings = dst.Strings[:ss]
   507  	}
   508  
   509  	// Decompress strings
   510  	var sWG sync.WaitGroup
   511  	var stringsErr, msgErr error
   512  	err := s.decBlock(br, dst.Strings, &sWG, &stringsErr)
   513  	if err != nil {
   514  		return dst, err
   515  	}
   516  
   517  	// Message size
   518  	if ss, err := binary.ReadUvarint(br); err != nil {
   519  		return dst, err
   520  	} else {
   521  		if uint64(cap(dst.Message)) < ss || dst.Message == nil {
   522  			dst.Message = make([]byte, ss)
   523  		}
   524  		dst.Message = dst.Message[:ss]
   525  	}
   526  
   527  	// Messages
   528  	err = s.decBlock(br, dst.Message, &sWG, &msgErr)
   529  	if err != nil {
   530  		return dst, err
   531  	}
   532  	defer sWG.Wait()
   533  
   534  	// Decompress tags
   535  	if tags, err := binary.ReadUvarint(br); err != nil {
   536  		return dst, err
   537  	} else {
   538  		if uint64(cap(s.tagsBuf)) < tags {
   539  			s.tagsBuf = make([]byte, tags)
   540  		}
   541  		s.tagsBuf = s.tagsBuf[:tags]
   542  	}
   543  
   544  	var wg sync.WaitGroup
   545  	var tagsErr error
   546  	err = s.decBlock(br, s.tagsBuf, &wg, &tagsErr)
   547  	if err != nil {
   548  		return dst, fmt.Errorf("decompressing tags: %w", err)
   549  	}
   550  	defer wg.Wait()
   551  
   552  	// Decompress values
   553  	if vals, err := binary.ReadUvarint(br); err != nil {
   554  		return dst, err
   555  	} else {
   556  		if uint64(cap(s.valuesBuf)) < vals {
   557  			s.valuesBuf = make([]byte, vals)
   558  		}
   559  		s.valuesBuf = s.valuesBuf[:vals]
   560  	}
   561  
   562  	var valsErr error
   563  	err = s.decBlock(br, s.valuesBuf, &wg, &valsErr)
   564  	if err != nil {
   565  		return dst, fmt.Errorf("decompressing values: %w", err)
   566  	}
   567  
   568  	// Wait until we have what we need for the tape.
   569  	wg.Wait()
   570  	switch {
   571  	case tagsErr != nil:
   572  		return dst, fmt.Errorf("decompressing tags: %w", tagsErr)
   573  	case valsErr != nil:
   574  		return dst, fmt.Errorf("decompressing values: %w", valsErr)
   575  	}
   576  
   577  	// Reconstruct tape:
   578  	var off int
   579  	values := s.valuesBuf
   580  	for _, t := range s.tagsBuf {
   581  		if off == len(dst.Tape) {
   582  			return dst, errors.New("tags extended beyond tape")
   583  		}
   584  		tag := Tag(t)
   585  
   586  		tagDst := uint64(t) << 56
   587  		switch tag {
   588  		case TagString:
   589  			if len(values) < 16 {
   590  				return dst, fmt.Errorf("reading %v: no values left", tag)
   591  			}
   592  			sOffset := binary.LittleEndian.Uint64(values[:8])
   593  			sLen := binary.LittleEndian.Uint64(values[8:16])
   594  			values = values[16:]
   595  
   596  			dst.Tape[off] = tagDst | sOffset
   597  			dst.Tape[off+1] = sLen
   598  			off += 2
   599  		case TagFloat, TagInteger, TagUint:
   600  			if len(values) < 8 {
   601  				return dst, fmt.Errorf("reading %v: no values left", tag)
   602  			}
   603  			dst.Tape[off] = tagDst
   604  			dst.Tape[off+1] = binary.LittleEndian.Uint64(values[:8])
   605  			values = values[8:]
   606  			off += 2
   607  		case tagFloatWithFlag:
   608  			// Tape contains full value
   609  			if len(values) < 16 {
   610  				return dst, fmt.Errorf("reading %v: no values left", tag)
   611  			}
   612  			dst.Tape[off] = binary.LittleEndian.Uint64(values[:8])
   613  			dst.Tape[off+1] = binary.LittleEndian.Uint64(values[8:16])
   614  			values = values[16:]
   615  			off += 2
   616  		case TagNull, TagBoolTrue, TagBoolFalse, TagEnd:
   617  			dst.Tape[off] = tagDst
   618  			off++
   619  		case TagObjectStart, TagArrayStart:
   620  			if len(values) < 8 {
   621  				return dst, fmt.Errorf("reading %v: no values left", tag)
   622  			}
   623  			// Always forward
   624  			val := binary.LittleEndian.Uint64(values[:8])
   625  			values = values[8:]
   626  			val += uint64(off)
   627  			if val > uint64(len(dst.Tape)) {
   628  				return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val)
   629  			}
   630  
   631  			dst.Tape[off] = tagDst | val
   632  			// Write closing...
   633  			dst.Tape[val-1] = uint64(tagOpenToClose[tag])<<56 | uint64(off)
   634  
   635  			off++
   636  		case TagRoot:
   637  			if len(values) < 8 {
   638  				return dst, fmt.Errorf("reading %v: no values left", tag)
   639  			}
   640  			// Always forward
   641  			val := binary.LittleEndian.Uint64(values[:8])
   642  			values = values[8:]
   643  			val += uint64(off)
   644  			if val > uint64(len(dst.Tape)) {
   645  				return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val)
   646  			}
   647  
   648  			dst.Tape[off] = tagDst | val
   649  
   650  			off++
   651  		case TagObjectEnd, TagArrayEnd:
   652  			// This should already have been written.
   653  			if dst.Tape[off]&JSONTAGMASK != tagDst {
   654  				return dst, fmt.Errorf("reading %v, offset:%d, start tag did not match %x != %x", tag, off, dst.Tape[off]>>56, uint8(tag))
   655  			}
   656  			off++
   657  		default:
   658  			return nil, fmt.Errorf("unknown tag: %v", tag)
   659  		}
   660  	}
   661  	sWG.Wait()
   662  	if off != len(dst.Tape) {
   663  		return dst, fmt.Errorf("tags did not fill tape, want %d, got %d", len(dst.Tape), off)
   664  	}
   665  	if len(values) > 0 {
   666  		return dst, fmt.Errorf("values did not fill tape, want %d, got %d", len(dst.Tape), off)
   667  	}
   668  	if stringsErr != nil {
   669  		return dst, fmt.Errorf("reading strings: %w", stringsErr)
   670  	}
   671  	return dst, nil
   672  }
   673  
   674  func (s *Serializer) decBlock(br *bytes.Buffer, dst []byte, wg *sync.WaitGroup, dstErr *error) error {
   675  	size, err := binary.ReadUvarint(br)
   676  	if err != nil {
   677  		return err
   678  	}
   679  	if size > uint64(br.Len()) {
   680  		return fmt.Errorf("block size (%d) extends beyond input %d", size, br.Len())
   681  	}
   682  	if size == 0 && len(dst) == 0 {
   683  		// Nothing, no compress type
   684  		return nil
   685  	}
   686  	if size < 1 {
   687  		return fmt.Errorf("block size (%d) too small %d", size, br.Len())
   688  	}
   689  
   690  	typ, err := br.ReadByte()
   691  	if err != nil {
   692  		return err
   693  	}
   694  	size--
   695  	compressed := br.Next(int(size))
   696  	if len(compressed) != int(size) {
   697  		return errors.New("short block section")
   698  	}
   699  	switch typ {
   700  	case blockTypeUncompressed:
   701  		// uncompressed
   702  		if len(compressed) != len(dst) {
   703  			panic("err")
   704  			return fmt.Errorf("short uncompressed block: in (%d) != out (%d)", len(compressed), len(dst))
   705  		}
   706  		copy(dst, compressed)
   707  	case blockTypeS2:
   708  		wg.Add(1)
   709  		go func() {
   710  			defer wg.Done()
   711  			buf := bytes.NewBuffer(compressed)
   712  			dec := s2Readers.Get().(*s2.Reader)
   713  			dec.Reset(buf)
   714  			_, err := io.ReadFull(dec, dst)
   715  			dec.Reset(nil)
   716  			s2Readers.Put(dec)
   717  			*dstErr = err
   718  		}()
   719  	case blockTypeZstd:
   720  		wg.Add(1)
   721  		go func() {
   722  			defer wg.Done()
   723  			want := len(dst)
   724  			dst, err = zDec.DecodeAll(compressed, dst[:0])
   725  			if err == nil && want != len(dst) {
   726  				err = errors.New("zstd decompressed size mismatch")
   727  			}
   728  			*dstErr = err
   729  		}()
   730  	default:
   731  		return fmt.Errorf("unknown compression type: %d", typ)
   732  	}
   733  	return nil
   734  }
   735  
   736  const (
   737  	blockTypeUncompressed byte = 0
   738  	blockTypeS2           byte = 1
   739  	blockTypeZstd         byte = 2
   740  )
   741  
   742  var zDec *zstd.Decoder
   743  
   744  var zEncFast = sync.Pool{New: func() interface{} {
   745  	e, _ := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest), zstd.WithEncoderCRC(false))
   746  	return e
   747  }}
   748  
   749  var s2FastWriters = sync.Pool{New: func() interface{} {
   750  	return s2.NewWriter(nil)
   751  }}
   752  
   753  var s2Writers = sync.Pool{New: func() interface{} {
   754  	return s2.NewWriter(nil, s2.WriterBetterCompression())
   755  }}
   756  var s2Readers = sync.Pool{New: func() interface{} {
   757  	return s2.NewReader(nil)
   758  }}
   759  
   760  var initSerializerOnce sync.Once
   761  
   762  func initSerializer() {
   763  	zDec, _ = zstd.NewReader(nil)
   764  }
   765  
   766  type encodedResult func() ([]byte, error)
   767  
   768  // encBlock will encode a block of data.
   769  func encBlock(mode byte, buf []byte, fast bool) (io.Writer, encodedResult) {
   770  	dst := bytes.NewBuffer(buf[:0])
   771  	dst.WriteByte(mode)
   772  	switch mode {
   773  	case blockTypeUncompressed:
   774  		return dst, func() ([]byte, error) {
   775  			return dst.Bytes(), nil
   776  		}
   777  	case blockTypeS2:
   778  		var enc *s2.Writer
   779  		var put *sync.Pool
   780  		if fast {
   781  			enc = s2FastWriters.Get().(*s2.Writer)
   782  			put = &s2FastWriters
   783  		} else {
   784  			enc = s2Writers.Get().(*s2.Writer)
   785  			put = &s2Writers
   786  		}
   787  		enc.Reset(dst)
   788  		return enc, func() (i []byte, err error) {
   789  			err = enc.Close()
   790  			if err != nil {
   791  				return nil, err
   792  			}
   793  			enc.Reset(nil)
   794  			put.Put(enc)
   795  			return dst.Bytes(), nil
   796  		}
   797  	case blockTypeZstd:
   798  		enc := zEncFast.Get().(*zstd.Encoder)
   799  		enc.Reset(dst)
   800  		return enc, func() (i []byte, err error) {
   801  			err = enc.Close()
   802  			if err != nil {
   803  				return nil, err
   804  			}
   805  			enc.Reset(nil)
   806  			zEncFast.Put(enc)
   807  			return dst.Bytes(), nil
   808  		}
   809  	}
   810  	panic("unknown compression mode")
   811  }
   812  
   813  // indexString will deduplicate strings and populate
   814  func (s *Serializer) indexString(sb []byte) (offset uint64) {
   815  	// Only possible on 64 bit platforms, so it will never trigger on 32 bit platforms.
   816  	if uint32(len(sb)) >= math.MaxUint32 {
   817  		panic("string too long")
   818  	}
   819  
   820  	h := memHash(sb) & stringmask
   821  	off := int(s.stringsTable[h]) - 1
   822  	end := off + len(sb)
   823  	if off >= 0 && end <= len(s.stringBuf) {
   824  		found := s.stringBuf[off:end]
   825  		if bytes.Equal(found, sb) {
   826  			return uint64(off)
   827  		}
   828  		// It didn't match :(
   829  	}
   830  	off = len(s.stringBuf)
   831  	s.stringBuf = append(s.stringBuf, sb...)
   832  	s.stringsTable[h] = uint32(off + 1)
   833  	s.stringWr.Write(sb)
   834  	return uint64(off)
   835  }
   836  
   837  //go:noescape
   838  //go:linkname memhash runtime.memhash
   839  func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   840  
   841  // memHash is the hash function used by go map, it utilizes available hardware instructions (behaves
   842  // as aeshash if aes instruction is available).
   843  // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash.
   844  func memHash(data []byte) uint64 {
   845  	ss := (*stringStruct)(unsafe.Pointer(&data))
   846  	return uint64(memhash(ss.str, 0, uintptr(ss.len)))
   847  }
   848  
   849  type stringStruct struct {
   850  	str unsafe.Pointer
   851  	len int
   852  }