github.com/dgraph-io/simdjson-go@v0.3.0/parsed_serialize.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 import ( 20 "bufio" 21 "bytes" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 "io" 26 "math" 27 "runtime" 28 "sync" 29 "unsafe" 30 31 "github.com/klauspost/compress/s2" 32 "github.com/klauspost/compress/zstd" 33 ) 34 35 const ( 36 stringBits = 14 37 stringSize = 1 << stringBits 38 stringmask = stringSize - 1 39 serializedVersion = 2 40 ) 41 42 // Serializer allows to serialize parsed json and read it back. 43 // A Serializer can be reused, but not used concurrently. 44 type Serializer struct { 45 // Compressed strings 46 sMsg []byte 47 48 // Uncompressed tags 49 tagsBuf []byte 50 // Values 51 valuesBuf []byte 52 valuesCompBuf []byte 53 tagsCompBuf []byte 54 55 compValues, compTags uint8 56 compStrings uint8 57 fasterComp bool 58 59 // Deduplicated strings 60 stringWr io.Writer 61 stringsTable [stringSize]uint32 62 stringBuf []byte 63 64 maxBlockSize uint64 65 } 66 67 // NewSerializer will create and initialize a Serializer. 68 func NewSerializer() *Serializer { 69 initSerializerOnce.Do(initSerializer) 70 var s Serializer 71 s.CompressMode(CompressDefault) 72 s.maxBlockSize = 1 << 31 73 return &s 74 } 75 76 type CompressMode uint8 77 78 const ( 79 // CompressNone no compression whatsoever. 80 CompressNone CompressMode = iota 81 82 // CompressFast will apply light compression, 83 // but will not deduplicate strings which may affect deserialization speed. 84 CompressFast 85 86 // CompressDefault applies light compression and deduplicates strings. 87 CompressDefault 88 89 // CompressBest 90 CompressBest 91 ) 92 93 func (s *Serializer) CompressMode(c CompressMode) { 94 switch c { 95 case CompressNone: 96 s.compValues = blockTypeUncompressed 97 s.compTags = blockTypeUncompressed 98 s.compStrings = blockTypeUncompressed 99 case CompressFast: 100 s.compValues = blockTypeS2 101 s.compTags = blockTypeS2 102 s.compStrings = blockTypeS2 103 s.fasterComp = true 104 case CompressDefault: 105 s.compValues = blockTypeS2 106 s.compTags = blockTypeS2 107 s.compStrings = blockTypeS2 108 case CompressBest: 109 s.compValues = blockTypeZstd 110 s.compTags = blockTypeZstd 111 s.compStrings = blockTypeZstd 112 default: 113 panic("unknown compression mode") 114 } 115 } 116 117 func serializeNDStream(dst io.Writer, in <-chan Stream, reuse chan<- *ParsedJson, concurrency int, comp CompressMode) error { 118 if concurrency <= 0 { 119 concurrency = (runtime.GOMAXPROCS(0) + 1) / 2 120 } 121 var wg sync.WaitGroup 122 wg.Add(concurrency) 123 type workload struct { 124 pj *ParsedJson 125 dst chan []byte 126 } 127 var readCh = make(chan workload, concurrency) 128 var writeCh = make(chan chan []byte, concurrency) 129 dstPool := sync.Pool{ 130 New: func() interface{} { 131 return make([]byte, 0, 64<<10) 132 }, 133 } 134 for i := 0; i < concurrency; i++ { 135 go func() { 136 s := NewSerializer() 137 s.CompressMode(comp) 138 defer wg.Done() 139 for input := range readCh { 140 res := s.Serialize(dstPool.Get().([]byte)[:0], *input.pj) 141 input.dst <- res 142 select { 143 case reuse <- input.pj: 144 default: 145 } 146 } 147 }() 148 } 149 var writeErr error 150 var wwg sync.WaitGroup 151 wwg.Add(1) 152 go func() { 153 defer wwg.Done() 154 for block := range writeCh { 155 b := <-block 156 var n int 157 n, writeErr = dst.Write(b) 158 if n != len(b) { 159 writeErr = io.ErrShortWrite 160 } 161 } 162 }() 163 var readErr error 164 var rwg sync.WaitGroup 165 rwg.Add(1) 166 go func() { 167 defer rwg.Done() 168 defer close(readCh) 169 for block := range in { 170 if block.Error != nil { 171 readErr = block.Error 172 } 173 readCh <- workload{ 174 pj: block.Value, 175 dst: make(chan []byte, 0), 176 } 177 } 178 }() 179 rwg.Wait() 180 if readErr != nil { 181 wg.Wait() 182 close(writeCh) 183 wwg.Wait() 184 return readErr 185 } 186 // Read done, wait for workers... 187 wg.Wait() 188 close(writeCh) 189 // Wait for writer... 190 wwg.Wait() 191 return writeErr 192 } 193 194 const ( 195 tagFloatWithFlag = Tag('e') 196 ) 197 198 // Serialize the data in pj and return the data. 199 // An optional destination can be provided. 200 func (s *Serializer) Serialize(dst []byte, pj ParsedJson) []byte { 201 // Blocks: 202 // - Compressed size of entire block following. Can be 0 if empty. (varuint) 203 // - Block type, byte: 204 // 0: uncompressed, rest is data. 205 // 1: S2 compressed stream. 206 // 2: Zstd block. 207 // - Compressed data. 208 // 209 // Serialized format: 210 // - Header: Version (byte) 211 // - Compressed size of remaining data (varuint). Excludes previous and size of this. 212 // - Tape size, uncompressed (varuint) 213 // - Strings size, uncompressed (varuint) 214 // - Strings Block: Compressed block. See above. 215 // - Message size, uncompressed (varuint) 216 // - Message Block: Compressed block. See above. 217 // - Uncompressed size of tags (varuint) 218 // - Tags Block: Compressed block. See above. 219 // - Uncompressed values size (varuint) 220 // - Values Block: Compressed block. See above. 221 // 222 // Reconstruction: 223 // 224 // Read next tag. Depending on the tag, read a number of values: 225 // Values: 226 // - Null, BoolTrue/BoolFalse: No value. 227 // - TagObjectStart, TagArrayStart, TagRoot: (Offset - Current offset). Write end tag for object and array. 228 // - TagObjectEnd, TagArrayEnd: No value stored, derived from start. 229 // - TagInteger, TagUint, TagFloat: 64 bits 230 // - TagString: offset, length stored. 231 // - tagFloatWithFlag (v2): Contains float parsing flag. 232 // 233 // If there are any values left as tag or value, it is considered invalid. 234 235 var wg sync.WaitGroup 236 237 // Reset lookup table. 238 // Offsets are offset by 1, so 0 indicates an unfilled entry. 239 for i := range s.stringsTable[:] { 240 s.stringsTable[i] = 0 241 } 242 if len(s.stringBuf) > 0 { 243 s.stringBuf = s.stringBuf[:0] 244 } 245 if len(s.sMsg) > 0 { 246 s.sMsg = s.sMsg[:0] 247 } 248 249 msgWr, msgDone := encBlock(s.compStrings, s.sMsg, s.fasterComp) 250 s.stringWr = msgWr 251 252 const tagBufSize = 64 << 10 253 const valBufSize = 64 << 10 254 255 valWr, valDone := encBlock(s.compValues, s.valuesCompBuf, s.fasterComp) 256 tagWr, tagDone := encBlock(s.compTags, s.tagsCompBuf, s.fasterComp) 257 // Pessimistically allocate for maximum possible size. 258 if cap(s.tagsBuf) <= tagBufSize { 259 s.tagsBuf = make([]byte, tagBufSize) 260 } 261 s.tagsBuf = s.tagsBuf[:tagBufSize] 262 263 // At most one value per 2 tape entries 264 if cap(s.valuesBuf) < valBufSize+4 { 265 s.valuesBuf = make([]byte, valBufSize+4) 266 } 267 268 s.valuesBuf = s.valuesBuf[:0] 269 off := 0 270 tagsOff := 0 271 var tmp [8]byte 272 rawValues := 0 273 rawTags := 0 274 for off < len(pj.Tape) { 275 if tagsOff >= tagBufSize { 276 rawTags += tagsOff 277 tagWr.Write(s.tagsBuf[:tagsOff]) 278 tagsOff = 0 279 } 280 if len(s.valuesBuf) >= valBufSize { 281 rawValues += len(s.valuesBuf) 282 valWr.Write(s.valuesBuf) 283 s.valuesBuf = s.valuesBuf[:0] 284 } 285 entry := pj.Tape[off] 286 ntype := Tag(entry >> 56) 287 payload := entry & JSONVALUEMASK 288 289 switch ntype { 290 case TagString: 291 sb, err := pj.stringByteAt(payload, pj.Tape[off+1]) 292 if err != nil { 293 panic(err) 294 } 295 offset := s.indexString(sb) 296 297 binary.LittleEndian.PutUint64(tmp[:], offset) 298 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 299 binary.LittleEndian.PutUint64(tmp[:], uint64(len(sb))) 300 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 301 off++ 302 case TagUint: 303 binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1]) 304 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 305 off++ 306 case TagInteger: 307 binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1]) 308 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 309 off++ 310 case TagFloat: 311 if payload == 0 { 312 binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1]) 313 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 314 off++ 315 } else { 316 ntype = tagFloatWithFlag 317 binary.LittleEndian.PutUint64(tmp[:], entry) 318 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 319 binary.LittleEndian.PutUint64(tmp[:], pj.Tape[off+1]) 320 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 321 off++ 322 } 323 case TagNull, TagBoolTrue, TagBoolFalse: 324 // No value. 325 case TagObjectStart, TagArrayStart, TagRoot: 326 // TagObjectStart TagArrayStart always points forward. 327 // TagRoot can point either direction so we rely on under/overflow. 328 binary.LittleEndian.PutUint64(tmp[:], payload-uint64(off)) 329 s.valuesBuf = append(s.valuesBuf, tmp[:]...) 330 case TagObjectEnd, TagArrayEnd, TagEnd: 331 // Value can be deducted from start tag or no value. 332 default: 333 wg.Wait() 334 panic(fmt.Errorf("unknown tag: %d", int(ntype))) 335 } 336 s.tagsBuf[tagsOff] = uint8(ntype) 337 tagsOff++ 338 off++ 339 } 340 if tagsOff > 0 { 341 rawTags += tagsOff 342 tagWr.Write(s.tagsBuf[:tagsOff]) 343 } 344 if len(s.valuesBuf) > 0 { 345 rawValues += len(s.valuesBuf) 346 valWr.Write(s.valuesBuf) 347 } 348 wg.Add(3) 349 go func() { 350 var err error 351 s.tagsCompBuf, err = tagDone() 352 if err != nil { 353 panic(err) 354 } 355 wg.Done() 356 }() 357 go func() { 358 var err error 359 s.valuesCompBuf, err = valDone() 360 if err != nil { 361 panic(err) 362 } 363 wg.Done() 364 }() 365 go func() { 366 var err error 367 s.sMsg, err = msgDone() 368 if err != nil { 369 panic(err) 370 } 371 wg.Done() 372 }() 373 374 // Wait for compressors 375 wg.Wait() 376 377 // Version 378 dst = append(dst, serializedVersion) 379 380 // Size of varints... 381 varInts := binary.PutUvarint(tmp[:], uint64(0)) + 382 binary.PutUvarint(tmp[:], uint64(len(s.sMsg))) + 383 binary.PutUvarint(tmp[:], uint64(rawTags)) + 384 binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf))) + 385 binary.PutUvarint(tmp[:], uint64(rawValues)) + 386 binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf))) + 387 binary.PutUvarint(tmp[:], uint64(len(s.stringBuf))) + 388 binary.PutUvarint(tmp[:], uint64(len(pj.Tape))) 389 390 n := binary.PutUvarint(tmp[:], uint64(1+len(s.sMsg)+len(s.tagsCompBuf)+len(s.valuesCompBuf)+varInts)) 391 dst = append(dst, tmp[:n]...) 392 393 // Tape elements, uncompressed. 394 n = binary.PutUvarint(tmp[:], uint64(len(pj.Tape))) 395 dst = append(dst, tmp[:n]...) 396 397 // Strings uncompressed size 398 dst = append(dst, 0) 399 // Strings 400 dst = append(dst, 0) 401 402 // Messages uncompressed size 403 n = binary.PutUvarint(tmp[:], uint64(len(s.stringBuf))) 404 dst = append(dst, tmp[:n]...) 405 // Message 406 n = binary.PutUvarint(tmp[:], uint64(len(s.sMsg))) 407 dst = append(dst, tmp[:n]...) 408 dst = append(dst, s.sMsg...) 409 410 // Tags 411 n = binary.PutUvarint(tmp[:], uint64(rawTags)) 412 dst = append(dst, tmp[:n]...) 413 n = binary.PutUvarint(tmp[:], uint64(len(s.tagsCompBuf))) 414 dst = append(dst, tmp[:n]...) 415 dst = append(dst, s.tagsCompBuf...) 416 417 // Values 418 n = binary.PutUvarint(tmp[:], uint64(rawValues)) 419 dst = append(dst, tmp[:n]...) 420 n = binary.PutUvarint(tmp[:], uint64(len(s.valuesCompBuf))) 421 dst = append(dst, tmp[:n]...) 422 dst = append(dst, s.valuesCompBuf...) 423 if false { 424 fmt.Println("strings:", len(pj.Strings)+len(pj.Message), "->", len(s.sMsg), "tags:", rawTags, "->", len(s.tagsCompBuf), "values:", rawValues, "->", len(s.valuesCompBuf), "Total:", len(pj.Message)+len(pj.Strings)+len(pj.Tape)*8, "->", len(dst)) 425 } 426 427 return dst 428 } 429 430 func (s *Serializer) splitBlocks(r io.Reader, out chan []byte) error { 431 br := bufio.NewReader(r) 432 defer close(out) 433 for { 434 if v, err := br.ReadByte(); err != nil { 435 return err 436 } else if v != 1 { 437 return errors.New("unknown version") 438 } 439 440 // Comp size 441 c, err := binary.ReadUvarint(br) 442 if err != nil { 443 return err 444 } 445 if c > s.maxBlockSize { 446 return errors.New("compressed block too big") 447 } 448 block := make([]byte, c) 449 n, err := io.ReadFull(br, block) 450 if err != nil { 451 return err 452 } 453 if n > 0 { 454 out <- block 455 } 456 } 457 } 458 459 // Deserialize the content in src. 460 // Only basic sanity checks will be performed. 461 // Slight corruption will likely go through unnoticed. 462 // And optional destination can be provided. 463 func (s *Serializer) Deserialize(src []byte, dst *ParsedJson) (*ParsedJson, error) { 464 br := bytes.NewBuffer(src) 465 466 if v, err := br.ReadByte(); err != nil { 467 return dst, err 468 } else if v > serializedVersion { 469 // v2 reads v1. 470 return dst, errors.New("unknown version") 471 } 472 473 if dst == nil { 474 dst = &ParsedJson{} 475 } 476 477 // Comp size 478 if c, err := binary.ReadUvarint(br); err != nil { 479 return dst, err 480 } else { 481 if int(c) > br.Len() { 482 return dst, fmt.Errorf("stream too short, want %d, only have %d left", c, br.Len()) 483 } 484 if int(c) > br.Len() { 485 fmt.Println("extra length:", int(c), br.Len()) 486 } 487 } 488 489 // Tape size 490 if ts, err := binary.ReadUvarint(br); err != nil { 491 return dst, err 492 } else { 493 if uint64(cap(dst.Tape)) < ts { 494 dst.Tape = make([]uint64, ts) 495 } 496 dst.Tape = dst.Tape[:ts] 497 } 498 499 // String size 500 if ss, err := binary.ReadUvarint(br); err != nil { 501 return dst, err 502 } else { 503 if uint64(cap(dst.Strings)) < ss || dst.Strings == nil { 504 dst.Strings = make([]byte, ss) 505 } 506 dst.Strings = dst.Strings[:ss] 507 } 508 509 // Decompress strings 510 var sWG sync.WaitGroup 511 var stringsErr, msgErr error 512 err := s.decBlock(br, dst.Strings, &sWG, &stringsErr) 513 if err != nil { 514 return dst, err 515 } 516 517 // Message size 518 if ss, err := binary.ReadUvarint(br); err != nil { 519 return dst, err 520 } else { 521 if uint64(cap(dst.Message)) < ss || dst.Message == nil { 522 dst.Message = make([]byte, ss) 523 } 524 dst.Message = dst.Message[:ss] 525 } 526 527 // Messages 528 err = s.decBlock(br, dst.Message, &sWG, &msgErr) 529 if err != nil { 530 return dst, err 531 } 532 defer sWG.Wait() 533 534 // Decompress tags 535 if tags, err := binary.ReadUvarint(br); err != nil { 536 return dst, err 537 } else { 538 if uint64(cap(s.tagsBuf)) < tags { 539 s.tagsBuf = make([]byte, tags) 540 } 541 s.tagsBuf = s.tagsBuf[:tags] 542 } 543 544 var wg sync.WaitGroup 545 var tagsErr error 546 err = s.decBlock(br, s.tagsBuf, &wg, &tagsErr) 547 if err != nil { 548 return dst, fmt.Errorf("decompressing tags: %w", err) 549 } 550 defer wg.Wait() 551 552 // Decompress values 553 if vals, err := binary.ReadUvarint(br); err != nil { 554 return dst, err 555 } else { 556 if uint64(cap(s.valuesBuf)) < vals { 557 s.valuesBuf = make([]byte, vals) 558 } 559 s.valuesBuf = s.valuesBuf[:vals] 560 } 561 562 var valsErr error 563 err = s.decBlock(br, s.valuesBuf, &wg, &valsErr) 564 if err != nil { 565 return dst, fmt.Errorf("decompressing values: %w", err) 566 } 567 568 // Wait until we have what we need for the tape. 569 wg.Wait() 570 switch { 571 case tagsErr != nil: 572 return dst, fmt.Errorf("decompressing tags: %w", tagsErr) 573 case valsErr != nil: 574 return dst, fmt.Errorf("decompressing values: %w", valsErr) 575 } 576 577 // Reconstruct tape: 578 var off int 579 values := s.valuesBuf 580 for _, t := range s.tagsBuf { 581 if off == len(dst.Tape) { 582 return dst, errors.New("tags extended beyond tape") 583 } 584 tag := Tag(t) 585 586 tagDst := uint64(t) << 56 587 switch tag { 588 case TagString: 589 if len(values) < 16 { 590 return dst, fmt.Errorf("reading %v: no values left", tag) 591 } 592 sOffset := binary.LittleEndian.Uint64(values[:8]) 593 sLen := binary.LittleEndian.Uint64(values[8:16]) 594 values = values[16:] 595 596 dst.Tape[off] = tagDst | sOffset 597 dst.Tape[off+1] = sLen 598 off += 2 599 case TagFloat, TagInteger, TagUint: 600 if len(values) < 8 { 601 return dst, fmt.Errorf("reading %v: no values left", tag) 602 } 603 dst.Tape[off] = tagDst 604 dst.Tape[off+1] = binary.LittleEndian.Uint64(values[:8]) 605 values = values[8:] 606 off += 2 607 case tagFloatWithFlag: 608 // Tape contains full value 609 if len(values) < 16 { 610 return dst, fmt.Errorf("reading %v: no values left", tag) 611 } 612 dst.Tape[off] = binary.LittleEndian.Uint64(values[:8]) 613 dst.Tape[off+1] = binary.LittleEndian.Uint64(values[8:16]) 614 values = values[16:] 615 off += 2 616 case TagNull, TagBoolTrue, TagBoolFalse, TagEnd: 617 dst.Tape[off] = tagDst 618 off++ 619 case TagObjectStart, TagArrayStart: 620 if len(values) < 8 { 621 return dst, fmt.Errorf("reading %v: no values left", tag) 622 } 623 // Always forward 624 val := binary.LittleEndian.Uint64(values[:8]) 625 values = values[8:] 626 val += uint64(off) 627 if val > uint64(len(dst.Tape)) { 628 return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val) 629 } 630 631 dst.Tape[off] = tagDst | val 632 // Write closing... 633 dst.Tape[val-1] = uint64(tagOpenToClose[tag])<<56 | uint64(off) 634 635 off++ 636 case TagRoot: 637 if len(values) < 8 { 638 return dst, fmt.Errorf("reading %v: no values left", tag) 639 } 640 // Always forward 641 val := binary.LittleEndian.Uint64(values[:8]) 642 values = values[8:] 643 val += uint64(off) 644 if val > uint64(len(dst.Tape)) { 645 return dst, fmt.Errorf("%v extends beyond tape (%d). offset:%d", tag, len(dst.Tape), val) 646 } 647 648 dst.Tape[off] = tagDst | val 649 650 off++ 651 case TagObjectEnd, TagArrayEnd: 652 // This should already have been written. 653 if dst.Tape[off]&JSONTAGMASK != tagDst { 654 return dst, fmt.Errorf("reading %v, offset:%d, start tag did not match %x != %x", tag, off, dst.Tape[off]>>56, uint8(tag)) 655 } 656 off++ 657 default: 658 return nil, fmt.Errorf("unknown tag: %v", tag) 659 } 660 } 661 sWG.Wait() 662 if off != len(dst.Tape) { 663 return dst, fmt.Errorf("tags did not fill tape, want %d, got %d", len(dst.Tape), off) 664 } 665 if len(values) > 0 { 666 return dst, fmt.Errorf("values did not fill tape, want %d, got %d", len(dst.Tape), off) 667 } 668 if stringsErr != nil { 669 return dst, fmt.Errorf("reading strings: %w", stringsErr) 670 } 671 return dst, nil 672 } 673 674 func (s *Serializer) decBlock(br *bytes.Buffer, dst []byte, wg *sync.WaitGroup, dstErr *error) error { 675 size, err := binary.ReadUvarint(br) 676 if err != nil { 677 return err 678 } 679 if size > uint64(br.Len()) { 680 return fmt.Errorf("block size (%d) extends beyond input %d", size, br.Len()) 681 } 682 if size == 0 && len(dst) == 0 { 683 // Nothing, no compress type 684 return nil 685 } 686 if size < 1 { 687 return fmt.Errorf("block size (%d) too small %d", size, br.Len()) 688 } 689 690 typ, err := br.ReadByte() 691 if err != nil { 692 return err 693 } 694 size-- 695 compressed := br.Next(int(size)) 696 if len(compressed) != int(size) { 697 return errors.New("short block section") 698 } 699 switch typ { 700 case blockTypeUncompressed: 701 // uncompressed 702 if len(compressed) != len(dst) { 703 panic("err") 704 return fmt.Errorf("short uncompressed block: in (%d) != out (%d)", len(compressed), len(dst)) 705 } 706 copy(dst, compressed) 707 case blockTypeS2: 708 wg.Add(1) 709 go func() { 710 defer wg.Done() 711 buf := bytes.NewBuffer(compressed) 712 dec := s2Readers.Get().(*s2.Reader) 713 dec.Reset(buf) 714 _, err := io.ReadFull(dec, dst) 715 dec.Reset(nil) 716 s2Readers.Put(dec) 717 *dstErr = err 718 }() 719 case blockTypeZstd: 720 wg.Add(1) 721 go func() { 722 defer wg.Done() 723 want := len(dst) 724 dst, err = zDec.DecodeAll(compressed, dst[:0]) 725 if err == nil && want != len(dst) { 726 err = errors.New("zstd decompressed size mismatch") 727 } 728 *dstErr = err 729 }() 730 default: 731 return fmt.Errorf("unknown compression type: %d", typ) 732 } 733 return nil 734 } 735 736 const ( 737 blockTypeUncompressed byte = 0 738 blockTypeS2 byte = 1 739 blockTypeZstd byte = 2 740 ) 741 742 var zDec *zstd.Decoder 743 744 var zEncFast = sync.Pool{New: func() interface{} { 745 e, _ := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedFastest), zstd.WithEncoderCRC(false)) 746 return e 747 }} 748 749 var s2FastWriters = sync.Pool{New: func() interface{} { 750 return s2.NewWriter(nil) 751 }} 752 753 var s2Writers = sync.Pool{New: func() interface{} { 754 return s2.NewWriter(nil, s2.WriterBetterCompression()) 755 }} 756 var s2Readers = sync.Pool{New: func() interface{} { 757 return s2.NewReader(nil) 758 }} 759 760 var initSerializerOnce sync.Once 761 762 func initSerializer() { 763 zDec, _ = zstd.NewReader(nil) 764 } 765 766 type encodedResult func() ([]byte, error) 767 768 // encBlock will encode a block of data. 769 func encBlock(mode byte, buf []byte, fast bool) (io.Writer, encodedResult) { 770 dst := bytes.NewBuffer(buf[:0]) 771 dst.WriteByte(mode) 772 switch mode { 773 case blockTypeUncompressed: 774 return dst, func() ([]byte, error) { 775 return dst.Bytes(), nil 776 } 777 case blockTypeS2: 778 var enc *s2.Writer 779 var put *sync.Pool 780 if fast { 781 enc = s2FastWriters.Get().(*s2.Writer) 782 put = &s2FastWriters 783 } else { 784 enc = s2Writers.Get().(*s2.Writer) 785 put = &s2Writers 786 } 787 enc.Reset(dst) 788 return enc, func() (i []byte, err error) { 789 err = enc.Close() 790 if err != nil { 791 return nil, err 792 } 793 enc.Reset(nil) 794 put.Put(enc) 795 return dst.Bytes(), nil 796 } 797 case blockTypeZstd: 798 enc := zEncFast.Get().(*zstd.Encoder) 799 enc.Reset(dst) 800 return enc, func() (i []byte, err error) { 801 err = enc.Close() 802 if err != nil { 803 return nil, err 804 } 805 enc.Reset(nil) 806 zEncFast.Put(enc) 807 return dst.Bytes(), nil 808 } 809 } 810 panic("unknown compression mode") 811 } 812 813 // indexString will deduplicate strings and populate 814 func (s *Serializer) indexString(sb []byte) (offset uint64) { 815 // Only possible on 64 bit platforms, so it will never trigger on 32 bit platforms. 816 if uint32(len(sb)) >= math.MaxUint32 { 817 panic("string too long") 818 } 819 820 h := memHash(sb) & stringmask 821 off := int(s.stringsTable[h]) - 1 822 end := off + len(sb) 823 if off >= 0 && end <= len(s.stringBuf) { 824 found := s.stringBuf[off:end] 825 if bytes.Equal(found, sb) { 826 return uint64(off) 827 } 828 // It didn't match :( 829 } 830 off = len(s.stringBuf) 831 s.stringBuf = append(s.stringBuf, sb...) 832 s.stringsTable[h] = uint32(off + 1) 833 s.stringWr.Write(sb) 834 return uint64(off) 835 } 836 837 //go:noescape 838 //go:linkname memhash runtime.memhash 839 func memhash(p unsafe.Pointer, h, s uintptr) uintptr 840 841 // memHash is the hash function used by go map, it utilizes available hardware instructions (behaves 842 // as aeshash if aes instruction is available). 843 // NOTE: The hash seed changes for every process. So, this cannot be used as a persistent hash. 844 func memHash(data []byte) uint64 { 845 ss := (*stringStruct)(unsafe.Pointer(&data)) 846 return uint64(memhash(ss.str, 0, uintptr(ss.len))) 847 } 848 849 type stringStruct struct { 850 str unsafe.Pointer 851 len int 852 }