github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/parsed_json.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 import ( 20 "errors" 21 "fmt" 22 "math" 23 "strconv" 24 ) 25 26 const JSONVALUEMASK = 0xff_ffff_ffff_ffff 27 const JSONTAGOFFSET = 56 28 const JSONTAGMASK = 0xff << JSONTAGOFFSET 29 const STRINGBUFBIT = 0x80_0000_0000_0000 30 const STRINGBUFMASK = 0x7fffffffffffff 31 32 const maxdepth = 128 33 34 // FloatFlags are flags recorded when converting floats. 35 type FloatFlags uint64 36 37 // FloatFlag is a flag recorded when parsing floats. 38 type FloatFlag uint64 39 40 const ( 41 // FloatOverflowedInteger is set when number in JSON was in integer notation, 42 // but under/overflowed both int64 and uint64 and therefore was parsed as float. 43 FloatOverflowedInteger FloatFlag = 1 << iota 44 ) 45 46 // Contains returns whether f contains the specified flag. 47 func (f FloatFlags) Contains(flag FloatFlag) bool { 48 return FloatFlag(f)&flag == flag 49 } 50 51 // Flags converts the flag to FloatFlags and optionally merges more flags. 52 func (f FloatFlag) Flags(more ...FloatFlag) FloatFlags { 53 // We operate on a copy, so we can modify f. 54 for _, v := range more { 55 f |= v 56 } 57 return FloatFlags(f) 58 } 59 60 type TStrings struct { 61 B []byte 62 } 63 64 type ParsedJson struct { 65 Message []byte 66 Tape []uint64 67 Strings *TStrings 68 69 // allows to reuse the internal structures without exposing it. 70 internal *internalParsedJson 71 } 72 73 const indexSlots = 16 74 const indexSize = 1536 // Seems to be a good size for the index buffering 75 const indexSizeWithSafetyBuffer = indexSize - 128 // Make sure we never write beyond buffer 76 77 type indexChan struct { 78 index int 79 length int 80 indexes *[indexSize]uint32 81 } 82 83 type internalParsedJson struct { 84 ParsedJson 85 containingScopeOffset []uint64 86 isvalid bool 87 indexChans chan indexChan 88 indexesChan indexChan 89 buffers [indexSlots][indexSize]uint32 90 buffersOffset uint64 91 ndjson uint64 92 copyStrings bool 93 } 94 95 // Iter returns a new Iter. 96 func (pj *ParsedJson) Iter() Iter { 97 return Iter{tape: *pj} 98 } 99 100 // stringAt returns a string at a specific offset in the stringbuffer. 101 func (pj *ParsedJson) stringAt(offset, length uint64) (string, error) { 102 b, err := pj.stringByteAt(offset, length) 103 return string(b), err 104 } 105 106 // stringByteAt returns a string at a specific offset in the stringbuffer. 107 func (pj *ParsedJson) stringByteAt(offset, length uint64) ([]byte, error) { 108 if offset&STRINGBUFBIT == 0 { 109 if offset+length > uint64(len(pj.Message)) { 110 return nil, fmt.Errorf("string message offset (%v) outside valid area (%v)", offset+length, len(pj.Message)) 111 } 112 return pj.Message[offset : offset+length], nil 113 } 114 115 offset = offset & STRINGBUFMASK 116 if offset+length > uint64(len(pj.Strings.B)) { 117 return nil, fmt.Errorf("string buffer offset (%v) outside valid area (%v)", offset+length, len(pj.Strings.B)) 118 } 119 return pj.Strings.B[offset : offset+length], nil 120 } 121 122 // ForEach returns each line in NDJSON, or the top element in non-ndjson. 123 // This will usually be an object or an array. 124 // If the callback returns a non-nil error parsing stops and the errors is returned. 125 func (pj *ParsedJson) ForEach(fn func(i Iter) error) error { 126 i := Iter{tape: *pj} 127 var elem Iter 128 for { 129 t, err := i.AdvanceIter(&elem) 130 if err != nil || t != TypeRoot { 131 return err 132 } 133 elem.AdvanceInto() 134 if err = fn(elem); err != nil { 135 return err 136 } 137 } 138 } 139 140 // Clone returns a deep clone of the ParsedJson. 141 // If a nil destination is sent a new will be created. 142 func (pj *ParsedJson) Clone(dst *ParsedJson) *ParsedJson { 143 if dst == nil { 144 dst = &ParsedJson{ 145 Message: make([]byte, len(pj.Message)), 146 Tape: make([]uint64, len(pj.Tape)), 147 Strings: &TStrings{make([]byte, len(pj.Strings.B))}, 148 internal: nil, 149 } 150 } else { 151 if cap(dst.Message) < len(pj.Message) { 152 dst.Message = make([]byte, len(pj.Message)) 153 } 154 if cap(dst.Tape) < len(pj.Tape) { 155 dst.Tape = make([]uint64, len(pj.Tape)) 156 } 157 if dst.Strings == nil { 158 dst.Strings = &TStrings{make([]byte, len(pj.Strings.B))} 159 } else if cap(dst.Strings.B) < len(pj.Strings.B) { 160 dst.Strings.B = make([]byte, len(pj.Strings.B)) 161 } 162 } 163 dst.internal = nil 164 dst.Tape = dst.Tape[:len(pj.Tape)] 165 copy(dst.Tape, pj.Tape) 166 dst.Message = dst.Message[:len(pj.Message)] 167 copy(dst.Message, pj.Message) 168 dst.Strings.B = dst.Strings.B[:len(pj.Strings.B)] 169 copy(dst.Strings.B, pj.Strings.B) 170 return dst 171 } 172 173 // Iter represents a section of JSON. 174 // To start iterating it, use Advance() or AdvanceIter() methods 175 // which will queue the first element. 176 // If an Iter is copied, the copy will be independent. 177 type Iter struct { 178 // The tape where this iter start. 179 tape ParsedJson 180 181 // offset of the next entry to be decoded 182 off int 183 184 // addNext is the number of entries to skip for the next entry. 185 addNext int 186 187 // current value, exclude tag in top bits 188 cur uint64 189 190 // current tag 191 t Tag 192 } 193 194 // Advance will read the type of the next element 195 // and queues up the value on the same level. 196 func (i *Iter) Advance() Type { 197 i.off += i.addNext 198 199 for { 200 if i.off >= len(i.tape.Tape) { 201 i.addNext = 0 202 i.t = TagEnd 203 return TypeNone 204 } 205 206 v := i.tape.Tape[i.off] 207 i.t = Tag(v >> 56) 208 i.off++ 209 i.cur = v & JSONVALUEMASK 210 if i.t == TagNop { 211 i.off += int(i.cur) 212 continue 213 } 214 break 215 } 216 i.calcNext(false) 217 if i.addNext < 0 { 218 // We can't send error, so move to end. 219 i.moveToEnd() 220 return TypeNone 221 } 222 return TagToType[i.t] 223 } 224 225 // AdvanceInto will read the tag of the next element 226 // and move into and out of arrays , objects and root elements. 227 // This should only be used for strictly manual parsing. 228 func (i *Iter) AdvanceInto() Tag { 229 i.off += i.addNext 230 for { 231 if i.off >= len(i.tape.Tape) { 232 i.addNext = 0 233 i.t = TagEnd 234 return TagEnd 235 } 236 237 v := i.tape.Tape[i.off] 238 i.t = Tag(v >> 56) 239 i.cur = v & JSONVALUEMASK 240 if i.t == TagNop { 241 if i.cur <= 0 { 242 i.moveToEnd() 243 return TagEnd 244 } 245 i.off += int(i.cur) 246 continue 247 } 248 i.off++ 249 break 250 } 251 i.calcNext(true) 252 if i.addNext < 0 { 253 // We can't send error, so end tape. 254 i.moveToEnd() 255 return TagEnd 256 } 257 return i.t 258 } 259 260 func (i *Iter) moveToEnd() { 261 i.off = len(i.tape.Tape) 262 i.addNext = 0 263 i.t = TagEnd 264 } 265 266 // calcNext will populate addNext to the correct value to skip. 267 // Specify whether to move into objects/array. 268 func (i *Iter) calcNext(into bool) { 269 i.addNext = 0 270 switch i.t { 271 case TagInteger, TagUint, TagFloat, TagString: 272 i.addNext = 1 273 case TagRoot, TagObjectStart, TagArrayStart: 274 if !into { 275 i.addNext = int(i.cur) - i.off 276 } 277 } 278 } 279 280 // Type returns the queued value type from the previous call to Advance. 281 func (i *Iter) Type() Type { 282 if i.off+i.addNext > len(i.tape.Tape) { 283 return TypeNone 284 } 285 return TagToType[i.t] 286 } 287 288 // AdvanceIter will read the type of the next element 289 // and return an iterator only containing the object. 290 // If dst and i are the same, both will contain the value inside. 291 func (i *Iter) AdvanceIter(dst *Iter) (Type, error) { 292 i.off += i.addNext 293 294 // Get current value off tape. 295 for { 296 if i.off == len(i.tape.Tape) { 297 i.addNext = 0 298 i.t = TagEnd 299 return TypeNone, nil 300 } 301 if i.off > len(i.tape.Tape) { 302 return TypeNone, errors.New("offset bigger than tape") 303 } 304 305 v := i.tape.Tape[i.off] 306 i.cur = v & JSONVALUEMASK 307 i.t = Tag(v >> 56) 308 i.off++ 309 if i.t == TagNop { 310 if i.cur <= 0 { 311 return TypeNone, errors.New("invalid nop skip") 312 } 313 i.off += int(i.cur) 314 continue 315 } 316 break 317 } 318 i.calcNext(false) 319 if i.addNext < 0 { 320 i.moveToEnd() 321 return TypeNone, errors.New("element has negative offset") 322 } 323 324 // Calculate end of this object. 325 iEnd := i.off + i.addNext 326 typ := TagToType[i.t] 327 328 // Copy i if different 329 if i != dst { 330 *dst = *i 331 } 332 // Move into dst 333 dst.calcNext(true) 334 if dst.addNext < 0 { 335 i.moveToEnd() 336 return TypeNone, errors.New("element has negative offset") 337 } 338 339 if iEnd > len(dst.tape.Tape) { 340 return TypeNone, errors.New("element extends beyond tape") 341 } 342 343 // Restrict destination. 344 dst.tape.Tape = dst.tape.Tape[:iEnd] 345 346 return typ, nil 347 } 348 349 // PeekNext will return the next value type. 350 // Returns TypeNone if next ends iterator. 351 func (i *Iter) PeekNext() Type { 352 off := i.off + i.addNext 353 for { 354 if off >= len(i.tape.Tape) { 355 return TypeNone 356 } 357 v := i.tape.Tape[off] 358 t := Tag(v >> 56) 359 if t == TagNop { 360 skip := int(v & JSONVALUEMASK) 361 if skip <= 0 { 362 return TypeNone 363 } 364 off += skip 365 continue 366 } 367 return TagToType[t] 368 } 369 } 370 371 // PeekNextTag will return the tag at the current offset. 372 // Will return TagEnd if at end of iterator. 373 func (i *Iter) PeekNextTag() Tag { 374 off := i.off + i.addNext 375 for { 376 if off >= len(i.tape.Tape) { 377 return TagEnd 378 } 379 v := i.tape.Tape[off] 380 t := Tag(v >> 56) 381 if t == TagNop { 382 skip := int(v & JSONVALUEMASK) 383 if skip <= 0 { 384 return TagEnd 385 } 386 off += skip 387 continue 388 } 389 return t 390 } 391 } 392 393 // MarshalJSON will marshal the entire remaining scope of the iterator. 394 func (i *Iter) MarshalJSON() ([]byte, error) { 395 return i.MarshalJSONBuffer(nil) 396 } 397 398 // MarshalJSONBuffer will marshal the remaining scope of the iterator including the current value. 399 // An optional buffer can be provided for fewer allocations. 400 // Output will be appended to the destination. 401 func (i *Iter) MarshalJSONBuffer(dst []byte) ([]byte, error) { 402 var tmpBuf []byte 403 404 // Pre-allocate for 100 deep. 405 var stackTmp [100]uint8 406 // We have a stackNone on top of the stack 407 stack := stackTmp[:1] 408 const ( 409 stackNone = iota 410 stackArray 411 stackObject 412 stackRoot 413 ) 414 415 writeloop: 416 for { 417 // Write key names. 418 if stack[len(stack)-1] == stackObject && i.t != TagObjectEnd { 419 sb, err := i.StringBytes() 420 if err != nil { 421 return nil, fmt.Errorf("expected key within object: %w", err) 422 } 423 dst = append(dst, '"') 424 dst = escapeBytes(dst, sb) 425 dst = append(dst, '"', ':') 426 if i.PeekNextTag() == TagEnd { 427 return nil, fmt.Errorf("unexpected end of tape within object") 428 } 429 i.AdvanceInto() 430 } 431 //fmt.Println(i.t, len(stack)-1, i.off) 432 tagswitch: 433 switch i.t { 434 case TagRoot: 435 isOpenRoot := int(i.cur) > i.off 436 if len(stack) > 1 { 437 if isOpenRoot { 438 return dst, errors.New("root tag open, but not at top of stack") 439 } 440 l := stack[len(stack)-1] 441 switch l { 442 case stackRoot: 443 if i.PeekNextTag() != TagEnd { 444 dst = append(dst, '\n') 445 } 446 stack = stack[:len(stack)-1] 447 break tagswitch 448 case stackNone: 449 break writeloop 450 default: 451 return dst, errors.New("root tag, but not at top of stack, got id " + strconv.Itoa(int(l))) 452 } 453 } 454 455 if isOpenRoot { 456 // Always move into root. 457 i.addNext = 0 458 } 459 i.AdvanceInto() 460 stack = append(stack, stackRoot) 461 continue 462 case TagString: 463 sb, err := i.StringBytes() 464 if err != nil { 465 return nil, err 466 } 467 dst = append(dst, '"') 468 dst = escapeBytes(dst, sb) 469 dst = append(dst, '"') 470 tmpBuf = tmpBuf[:0] 471 case TagInteger: 472 v, err := i.Int() 473 if err != nil { 474 return nil, err 475 } 476 dst = strconv.AppendInt(dst, v, 10) 477 case TagUint: 478 v, err := i.Uint() 479 if err != nil { 480 return nil, err 481 } 482 dst = strconv.AppendUint(dst, v, 10) 483 case TagFloat: 484 v, err := i.Float() 485 if err != nil { 486 return nil, err 487 } 488 dst, err = appendFloat(dst, v) 489 if err != nil { 490 return nil, err 491 } 492 case TagNull: 493 dst = append(dst, []byte("null")...) 494 case TagBoolTrue: 495 dst = append(dst, []byte("true")...) 496 case TagBoolFalse: 497 dst = append(dst, []byte("false")...) 498 case TagObjectStart: 499 dst = append(dst, '{') 500 stack = append(stack, stackObject) 501 // We should not emit commas. 502 i.AdvanceInto() 503 continue 504 case TagObjectEnd: 505 dst = append(dst, '}') 506 if stack[len(stack)-1] != stackObject { 507 return dst, errors.New("end of object with no object on stack") 508 } 509 stack = stack[:len(stack)-1] 510 case TagArrayStart: 511 dst = append(dst, '[') 512 stack = append(stack, stackArray) 513 i.AdvanceInto() 514 continue 515 case TagArrayEnd: 516 dst = append(dst, ']') 517 if stack[len(stack)-1] != stackArray { 518 return nil, errors.New("end of array with no array on stack") 519 } 520 stack = stack[:len(stack)-1] 521 case TagEnd: 522 if i.PeekNextTag() == TagEnd { 523 return nil, errors.New("no content queued in iterator") 524 } 525 i.AdvanceInto() 526 continue 527 } 528 529 if i.PeekNextTag() == TagEnd { 530 break 531 } 532 i.AdvanceInto() 533 534 // Output object separators, etc. 535 switch stack[len(stack)-1] { 536 case stackArray: 537 switch i.t { 538 case TagArrayEnd: 539 default: 540 dst = append(dst, ',') 541 } 542 case stackObject: 543 switch i.t { 544 case TagObjectEnd: 545 default: 546 dst = append(dst, ',') 547 } 548 } 549 } 550 if len(stack) > 1 { 551 // Copy so "stack" doesn't escape. 552 sCopy := append(make([]uint8, 0, len(stack)-1), stack[1:]...) 553 return nil, fmt.Errorf("objects or arrays not closed. left on stack: %v", sCopy) 554 } 555 return dst, nil 556 } 557 558 // Float returns the float value of the next element. 559 // Integers are automatically converted to float. 560 func (i *Iter) Float() (float64, error) { 561 switch i.t { 562 case TagFloat: 563 if i.off >= len(i.tape.Tape) { 564 return 0, errors.New("corrupt input: expected float, but no more values on tape") 565 } 566 v := math.Float64frombits(i.tape.Tape[i.off]) 567 return v, nil 568 case TagInteger: 569 if i.off >= len(i.tape.Tape) { 570 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 571 } 572 v := int64(i.tape.Tape[i.off]) 573 return float64(v), nil 574 case TagUint: 575 if i.off >= len(i.tape.Tape) { 576 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 577 } 578 v := i.tape.Tape[i.off] 579 return float64(v), nil 580 default: 581 return 0, fmt.Errorf("unable to convert type %v to float", i.t) 582 } 583 } 584 585 // FloatFlags returns the float value of the next element. 586 // This will include flags from parsing. 587 // Integers are automatically converted to float. 588 func (i *Iter) FloatFlags() (float64, FloatFlags, error) { 589 switch i.t { 590 case TagFloat: 591 if i.off >= len(i.tape.Tape) { 592 return 0, 0, errors.New("corrupt input: expected float, but no more values on tape") 593 } 594 v := math.Float64frombits(i.tape.Tape[i.off]) 595 return v, FloatFlags(i.cur), nil 596 case TagInteger: 597 if i.off >= len(i.tape.Tape) { 598 return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape") 599 } 600 v := int64(i.tape.Tape[i.off]) 601 return float64(v), 0, nil 602 case TagUint: 603 if i.off >= len(i.tape.Tape) { 604 return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape") 605 } 606 v := i.tape.Tape[i.off] 607 return float64(v), 0, nil 608 default: 609 return 0, 0, fmt.Errorf("unable to convert type %v to float", i.t) 610 } 611 } 612 613 // SetFloat can change a float, int, uint or string with the specified value. 614 // Attempting to change other types will return an error. 615 func (i *Iter) SetFloat(v float64) error { 616 switch i.t { 617 case TagFloat, TagInteger, TagUint, TagString: 618 i.tape.Tape[i.off-1] = uint64(TagFloat) << JSONTAGOFFSET 619 i.tape.Tape[i.off] = math.Float64bits(v) 620 i.t = TagFloat 621 i.cur = 0 622 return nil 623 } 624 return fmt.Errorf("cannot set tag %s to float", i.t.String()) 625 } 626 627 // Int returns the integer value of the next element. 628 // Integers and floats within range are automatically converted. 629 func (i *Iter) Int() (int64, error) { 630 switch i.t { 631 case TagFloat: 632 if i.off >= len(i.tape.Tape) { 633 return 0, errors.New("corrupt input: expected float, but no more values on tape") 634 } 635 v := math.Float64frombits(i.tape.Tape[i.off]) 636 if v > math.MaxInt64 { 637 return 0, errors.New("float value overflows int64") 638 } 639 if v < math.MinInt64 { 640 return 0, errors.New("float value underflows int64") 641 } 642 return int64(v), nil 643 case TagInteger: 644 if i.off >= len(i.tape.Tape) { 645 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 646 } 647 v := int64(i.tape.Tape[i.off]) 648 return v, nil 649 case TagUint: 650 if i.off >= len(i.tape.Tape) { 651 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 652 } 653 v := i.tape.Tape[i.off] 654 if v > math.MaxInt64 { 655 return 0, errors.New("unsigned integer value overflows int64") 656 } 657 return int64(v), nil 658 default: 659 return 0, fmt.Errorf("unable to convert type %v to int", i.t) 660 } 661 } 662 663 // SetInt can change a float, int, uint or string with the specified value. 664 // Attempting to change other types will return an error. 665 func (i *Iter) SetInt(v int64) error { 666 switch i.t { 667 case TagFloat, TagInteger, TagUint, TagString: 668 i.tape.Tape[i.off-1] = uint64(TagInteger) << JSONTAGOFFSET 669 i.tape.Tape[i.off] = uint64(v) 670 i.t = TagInteger 671 i.cur = uint64(v) 672 return nil 673 } 674 return fmt.Errorf("cannot set tag %s to int", i.t.String()) 675 } 676 677 // Uint returns the unsigned integer value of the next element. 678 // Positive integers and floats within range are automatically converted. 679 func (i *Iter) Uint() (uint64, error) { 680 switch i.t { 681 case TagFloat: 682 if i.off >= len(i.tape.Tape) { 683 return 0, errors.New("corrupt input: expected float, but no more values on tape") 684 } 685 v := math.Float64frombits(i.tape.Tape[i.off]) 686 if v > math.MaxUint64 { 687 return 0, errors.New("float value overflows uint64") 688 } 689 if v < 0 { 690 return 0, errors.New("float value is negative. cannot convert to uint") 691 } 692 return uint64(v), nil 693 case TagInteger: 694 if i.off >= len(i.tape.Tape) { 695 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 696 } 697 v := int64(i.tape.Tape[i.off]) 698 if v < 0 { 699 return 0, errors.New("integer value is negative. cannot convert to uint") 700 } 701 702 return uint64(v), nil 703 case TagUint: 704 if i.off >= len(i.tape.Tape) { 705 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 706 } 707 v := i.tape.Tape[i.off] 708 return v, nil 709 default: 710 return 0, fmt.Errorf("unable to convert type %v to uint", i.t) 711 } 712 } 713 714 // SetUInt can change a float, int, uint or string with the specified value. 715 // Attempting to change other types will return an error. 716 func (i *Iter) SetUInt(v uint64) error { 717 switch i.t { 718 case TagString, TagFloat, TagInteger, TagUint: 719 i.tape.Tape[i.off-1] = uint64(TagUint) << JSONTAGOFFSET 720 i.tape.Tape[i.off] = v 721 i.t = TagUint 722 i.cur = v 723 return nil 724 } 725 return fmt.Errorf("cannot set tag %s to uint", i.t.String()) 726 } 727 728 // String() returns a string value. 729 func (i *Iter) String() (string, error) { 730 if i.t != TagString { 731 return "", errors.New("value is not string") 732 } 733 if i.off >= len(i.tape.Tape) { 734 return "", errors.New("corrupt input: no string offset") 735 } 736 737 return i.tape.stringAt(i.cur, i.tape.Tape[i.off]) 738 } 739 740 // StringBytes returns a string as byte array. 741 func (i *Iter) StringBytes() ([]byte, error) { 742 if i.t != TagString { 743 return nil, errors.New("value is not string") 744 } 745 if i.off >= len(i.tape.Tape) { 746 return nil, errors.New("corrupt input: no string offset on tape") 747 } 748 return i.tape.stringByteAt(i.cur, i.tape.Tape[i.off]) 749 } 750 751 // SetString can change a string, int, uint or float with the specified string. 752 // Attempting to change other types will return an error. 753 func (i *Iter) SetString(v string) error { 754 return i.SetStringBytes([]byte(v)) 755 } 756 757 // SetStringBytes can change a string, int, uint or float with the specified string. 758 // Attempting to change other types will return an error. 759 // Sending nil will add an empty string. 760 func (i *Iter) SetStringBytes(v []byte) error { 761 switch i.t { 762 case TagString, TagFloat, TagInteger, TagUint: 763 i.cur = ((uint64(TagString) << JSONTAGOFFSET) | STRINGBUFBIT) | uint64(len(i.tape.Strings.B)) 764 i.tape.Tape[i.off-1] = i.cur 765 i.tape.Tape[i.off] = uint64(len(v)) 766 i.t = TagString 767 i.tape.Strings.B = append(i.tape.Strings.B, v...) 768 return nil 769 } 770 return fmt.Errorf("cannot set tag %s to string", i.t.String()) 771 } 772 773 // StringCvt returns a string representation of the value. 774 // Root, Object and Arrays are not supported. 775 func (i *Iter) StringCvt() (string, error) { 776 switch i.t { 777 case TagString: 778 return i.String() 779 case TagInteger: 780 v, err := i.Int() 781 return strconv.FormatInt(v, 10), err 782 case TagUint: 783 v, err := i.Uint() 784 return strconv.FormatUint(v, 10), err 785 case TagFloat: 786 v, err := i.Float() 787 if err != nil { 788 return "", err 789 } 790 return floatToString(v) 791 case TagBoolFalse: 792 return "false", nil 793 case TagBoolTrue: 794 return "true", nil 795 case TagNull: 796 return "null", nil 797 } 798 return "", fmt.Errorf("cannot convert type %s to string", TagToType[i.t]) 799 } 800 801 // Root returns the object embedded in root as an iterator 802 // along with the type of the content of the first element of the iterator. 803 // An optional destination can be supplied to avoid allocations. 804 func (i *Iter) Root(dst *Iter) (Type, *Iter, error) { 805 if i.t != TagRoot { 806 return TypeNone, dst, errors.New("value is not root") 807 } 808 if i.cur > uint64(len(i.tape.Tape)) { 809 return TypeNone, dst, errors.New("root element extends beyond tape") 810 } 811 if dst == nil { 812 c := *i 813 dst = &c 814 } else { 815 dst.cur = i.cur 816 dst.off = i.off 817 dst.t = i.t 818 dst.tape.Strings = i.tape.Strings 819 dst.tape.Message = i.tape.Message 820 } 821 dst.addNext = 0 822 dst.tape.Tape = i.tape.Tape[:i.cur-1] 823 return dst.AdvanceInto().Type(), dst, nil 824 } 825 826 // FindElement allows searching for fields and objects by path from the iter and forward, 827 // moving into root and objects, but not arrays. 828 // For example "Image", "Url" will search the current root/object for an "Image" 829 // object and return the value of the "Url" element. 830 // ErrPathNotFound is returned if any part of the path cannot be found. 831 // If the tape contains an error it will be returned. 832 // The iter will *not* be advanced. 833 func (i *Iter) FindElement(dst *Element, path ...string) (*Element, error) { 834 if len(path) == 0 { 835 return dst, ErrPathNotFound 836 } 837 // Local copy. 838 cp := *i 839 for { 840 switch cp.t { 841 case TagObjectStart: 842 var o Object 843 obj, err := cp.Object(&o) 844 if err != nil { 845 return dst, err 846 } 847 return obj.FindPath(dst, path...) 848 case TagRoot: 849 _, _, err := cp.Root(&cp) 850 if err != nil { 851 return dst, err 852 } 853 continue 854 case TagEnd: 855 tag := cp.AdvanceInto() 856 if tag == TagEnd { 857 return dst, ErrPathNotFound 858 } 859 continue 860 default: 861 return dst, fmt.Errorf("type %q found before object was found", cp.t) 862 } 863 } 864 } 865 866 // Bool returns the bool value. 867 func (i *Iter) Bool() (bool, error) { 868 switch i.t { 869 case TagBoolTrue: 870 return true, nil 871 case TagBoolFalse: 872 return false, nil 873 } 874 return false, fmt.Errorf("value is not bool, but %v", i.t) 875 } 876 877 // SetBool can change a bool or null type to bool with the specified value. 878 // Attempting to change other types will return an error. 879 func (i *Iter) SetBool(v bool) error { 880 switch i.t { 881 case TagBoolTrue, TagBoolFalse, TagNull: 882 if v { 883 i.t = TagBoolTrue 884 i.cur = 0 885 i.tape.Tape[i.off-1] = uint64(TagBoolTrue) << JSONTAGOFFSET 886 } else { 887 i.t = TagBoolFalse 888 i.cur = 0 889 i.tape.Tape[i.off-1] = uint64(TagBoolFalse) << JSONTAGOFFSET 890 } 891 return nil 892 } 893 return fmt.Errorf("cannot set tag %s to bool", i.t.String()) 894 } 895 896 // SetNull can change the following types to null: 897 // Bool, String, (Unsigned) Integer, Float, Objects and Arrays. 898 // Attempting to change other types will return an error. 899 func (i *Iter) SetNull() error { 900 switch i.t { 901 case TagBoolTrue, TagBoolFalse, TagNull: 902 // 1 value on stream 903 i.t = TagNull 904 i.cur = 0 905 i.tape.Tape[i.off-1] = uint64(TagNull) << JSONTAGOFFSET 906 case TagString, TagFloat, TagInteger, TagUint: 907 // 2 values 908 i.tape.Tape[i.off-1] = uint64(TagNull) << JSONTAGOFFSET 909 i.tape.Tape[i.off] = uint64(TagNop)<<JSONTAGOFFSET | 1 910 i.t = TagNull 911 i.cur = 0 912 case TagObjectStart, TagArrayStart, TagRoot: 913 // Read length, skipping the object/array: 914 i.addNext = int(i.cur) - i.off 915 i.tape.Tape[i.off-1] = uint64(TagNull) << JSONTAGOFFSET 916 // Fill with nops 917 for j := i.off; j < int(i.cur); j++ { 918 i.tape.Tape[j] = uint64(TagNop)<<JSONTAGOFFSET | (i.cur - uint64(j)) 919 } 920 i.t = TagNull 921 i.cur = 0 922 default: 923 return fmt.Errorf("cannot set tag %s to null", i.t.String()) 924 } 925 return nil 926 } 927 928 // Interface returns the value as an interface. 929 // Objects are returned as map[string]interface{}. 930 // Arrays are returned as []interface{}. 931 // Float values are returned as float64. 932 // Integer values are returned as int64 or uint64. 933 // String values are returned as string. 934 // Boolean values are returned as bool. 935 // Null values are returned as nil. 936 // Root objects are returned as []interface{}. 937 func (i *Iter) Interface() (interface{}, error) { 938 switch i.t.Type() { 939 case TypeUint: 940 return i.Uint() 941 case TypeInt: 942 return i.Int() 943 case TypeFloat: 944 return i.Float() 945 case TypeNull: 946 return nil, nil 947 case TypeArray: 948 arr, err := i.Array(nil) 949 if err != nil { 950 return nil, err 951 } 952 return arr.Interface() 953 case TypeString: 954 return i.String() 955 case TypeObject: 956 obj, err := i.Object(nil) 957 if err != nil { 958 return nil, err 959 } 960 return obj.Map(nil) 961 case TypeBool: 962 return i.t == TagBoolTrue, nil 963 case TypeRoot: 964 var dst []interface{} 965 var tmp Iter 966 for { 967 typ, obj, err := i.Root(&tmp) 968 if err != nil { 969 return nil, err 970 } 971 if typ == TypeNone { 972 break 973 } 974 elem, err := obj.Interface() 975 if err != nil { 976 return nil, err 977 } 978 dst = append(dst, elem) 979 typ = i.Advance() 980 if typ != TypeRoot { 981 break 982 } 983 } 984 return dst, nil 985 case TypeNone: 986 if i.PeekNextTag() == TagEnd { 987 return nil, errors.New("no content in iterator") 988 } 989 i.Advance() 990 return i.Interface() 991 default: 992 } 993 return nil, fmt.Errorf("unknown tag type: %v", i.t) 994 } 995 996 // Object will return the next element as an object. 997 // An optional destination can be given. 998 func (i *Iter) Object(dst *Object) (*Object, error) { 999 if i.t != TagObjectStart { 1000 return nil, errors.New("next item is not object") 1001 } 1002 end := i.cur 1003 if end < uint64(i.off) { 1004 return nil, errors.New("corrupt input: object ends at index before start") 1005 } 1006 if uint64(len(i.tape.Tape)) < end { 1007 return nil, errors.New("corrupt input: object extended beyond tape") 1008 } 1009 if dst == nil { 1010 dst = &Object{} 1011 } 1012 dst.tape.Tape = i.tape.Tape[:end] 1013 dst.tape.Strings = i.tape.Strings 1014 dst.tape.Message = i.tape.Message 1015 dst.off = i.off 1016 1017 return dst, nil 1018 } 1019 1020 // Array will return the next element as an array. 1021 // An optional destination can be given. 1022 func (i *Iter) Array(dst *Array) (*Array, error) { 1023 if i.t != TagArrayStart { 1024 return nil, errors.New("next item is not object") 1025 } 1026 end := i.cur 1027 if uint64(len(i.tape.Tape)) < end { 1028 return nil, errors.New("corrupt input: object extended beyond tape") 1029 } 1030 if dst == nil { 1031 dst = &Array{} 1032 } 1033 dst.tape.Tape = i.tape.Tape[:end] 1034 dst.tape.Strings = i.tape.Strings 1035 dst.tape.Message = i.tape.Message 1036 dst.off = i.off 1037 1038 return dst, nil 1039 } 1040 1041 func (pj *ParsedJson) Reset() { 1042 pj.Tape = pj.Tape[:0] 1043 pj.Strings.B = pj.Strings.B[:0] 1044 pj.Message = pj.Message[:0] 1045 } 1046 1047 func (pj *ParsedJson) get_current_loc() uint64 { 1048 return uint64(len(pj.Tape)) 1049 } 1050 1051 func (pj *ParsedJson) write_tape(val uint64, c byte) { 1052 pj.Tape = append(pj.Tape, val|(uint64(c)<<56)) 1053 } 1054 1055 // writeTapeTagVal will write a tag with no embedded value and a value to the tape. 1056 func (pj *ParsedJson) writeTapeTagVal(tag Tag, val uint64) { 1057 pj.Tape = append(pj.Tape, uint64(tag)<<56, val) 1058 } 1059 1060 func (pj *ParsedJson) writeTapeTagValFlags(id, val uint64) { 1061 pj.Tape = append(pj.Tape, id, val) 1062 } 1063 1064 func (pj *ParsedJson) write_tape_s64(val int64) { 1065 pj.writeTapeTagVal(TagInteger, uint64(val)) 1066 } 1067 1068 func (pj *ParsedJson) write_tape_double(d float64) { 1069 pj.writeTapeTagVal(TagFloat, math.Float64bits(d)) 1070 } 1071 1072 func (pj *ParsedJson) annotate_previousloc(saved_loc uint64, val uint64) { 1073 pj.Tape[saved_loc] |= val 1074 } 1075 1076 // Tag indicates the data type of a tape entry 1077 type Tag uint8 1078 1079 const ( 1080 TagString = Tag('"') 1081 TagInteger = Tag('l') 1082 TagUint = Tag('u') 1083 TagFloat = Tag('d') 1084 TagNull = Tag('n') 1085 TagBoolTrue = Tag('t') 1086 TagBoolFalse = Tag('f') 1087 TagObjectStart = Tag('{') 1088 TagObjectEnd = Tag('}') 1089 TagArrayStart = Tag('[') 1090 TagArrayEnd = Tag(']') 1091 TagRoot = Tag('r') 1092 TagNop = Tag('N') 1093 TagEnd = Tag(0) 1094 ) 1095 1096 var tagOpenToClose = [256]Tag{ 1097 TagObjectStart: TagObjectEnd, 1098 TagArrayStart: TagArrayEnd, 1099 TagRoot: TagRoot, 1100 } 1101 1102 func (t Tag) String() string { 1103 return string([]byte{byte(t)}) 1104 } 1105 1106 // Type is a JSON value type. 1107 type Type uint8 1108 1109 const ( 1110 TypeNone Type = iota 1111 TypeNull 1112 TypeString 1113 TypeInt 1114 TypeUint 1115 TypeFloat 1116 TypeBool 1117 TypeObject 1118 TypeArray 1119 TypeRoot 1120 ) 1121 1122 // String returns the type as a string. 1123 func (t Type) String() string { 1124 switch t { 1125 case TypeNone: 1126 return "(no type)" 1127 case TypeNull: 1128 return "null" 1129 case TypeString: 1130 return "string" 1131 case TypeInt: 1132 return "int" 1133 case TypeUint: 1134 return "uint" 1135 case TypeFloat: 1136 return "float" 1137 case TypeBool: 1138 return "bool" 1139 case TypeObject: 1140 return "object" 1141 case TypeArray: 1142 return "array" 1143 case TypeRoot: 1144 return "root" 1145 } 1146 return "(invalid)" 1147 } 1148 1149 // TagToType converts a tag to type. 1150 // For arrays and objects only the start tag will return types. 1151 // All non-existing tags returns TypeNone. 1152 var TagToType = [256]Type{ 1153 TagString: TypeString, 1154 TagInteger: TypeInt, 1155 TagUint: TypeUint, 1156 TagFloat: TypeFloat, 1157 TagNull: TypeNull, 1158 TagBoolTrue: TypeBool, 1159 TagBoolFalse: TypeBool, 1160 TagObjectStart: TypeObject, 1161 TagArrayStart: TypeArray, 1162 TagRoot: TypeRoot, 1163 } 1164 1165 // Type converts a tag to a type. 1166 // Only basic types and array+object start match a type. 1167 func (t Tag) Type() Type { 1168 return TagToType[t] 1169 } 1170 1171 var shouldEscape = [256]bool{ 1172 '\b': true, 1173 '\f': true, 1174 '\n': true, 1175 '\r': true, 1176 '"': true, 1177 '\t': true, 1178 '\\': true, 1179 // Remaining will be added in init below. 1180 } 1181 1182 func init() { 1183 for i := range shouldEscape[:0x20] { 1184 shouldEscape[i] = true 1185 } 1186 } 1187 1188 // escapeBytes will escape JSON bytes. 1189 // Output is appended to dst. 1190 func escapeBytes(dst, src []byte) []byte { 1191 esc := false 1192 for i, s := range src { 1193 if shouldEscape[s] { 1194 if i > 0 { 1195 dst = append(dst, src[:i]...) 1196 src = src[i:] 1197 } 1198 esc = true 1199 break 1200 } 1201 } 1202 if !esc { 1203 // Nothing was escaped... 1204 return append(dst, src...) 1205 } 1206 for _, s := range src { 1207 if !shouldEscape[s] { 1208 dst = append(dst, s) 1209 continue 1210 } 1211 switch s { 1212 case '\b': 1213 dst = append(dst, '\\', 'b') 1214 1215 case '\f': 1216 dst = append(dst, '\\', 'f') 1217 1218 case '\n': 1219 dst = append(dst, '\\', 'n') 1220 1221 case '\r': 1222 dst = append(dst, '\\', 'r') 1223 1224 case '"': 1225 dst = append(dst, '\\', '"') 1226 1227 case '\t': 1228 dst = append(dst, '\\', 't') 1229 1230 case '\\': 1231 dst = append(dst, '\\', '\\') 1232 1233 default: 1234 dst = append(dst, '\\', 'u', '0', '0', valToHex[s>>4], valToHex[s&0xf]) 1235 } 1236 } 1237 return dst 1238 } 1239 1240 var valToHex = [16]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'} 1241 1242 // floatToString converts a float to string similar to Go stdlib. 1243 func floatToString(f float64) (string, error) { 1244 var tmp [32]byte 1245 v, err := appendFloat(tmp[:0], f) 1246 return string(v), err 1247 } 1248 1249 // appendFloat converts a float to string similar to Go stdlib and appends it to dst. 1250 func appendFloat(dst []byte, f float64) ([]byte, error) { 1251 if math.IsInf(f, 0) || math.IsNaN(f) { 1252 return nil, errors.New("INF or NaN number found") 1253 } 1254 1255 // Convert as if by ES6 number to string conversion. 1256 // This matches most other JSON generators. 1257 // See golang.org/issue/6384 and golang.org/issue/14135. 1258 // Like fmt %g, but the exponent cutoffs are different 1259 // and exponents themselves are not padded to two digits. 1260 abs := math.Abs(f) 1261 if (abs >= 1e-6 && abs < 1e21) || abs == 0 { 1262 return appendFloatF(dst, f), nil 1263 } 1264 dst = strconv.AppendFloat(dst, f, 'e', -1, 64) 1265 // clean up e-09 to e-9 1266 n := len(dst) 1267 if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { 1268 dst[n-2] = dst[n-1] 1269 dst = dst[:n-1] 1270 } 1271 return dst, nil 1272 }