github.com/dgraph-io/simdjson-go@v0.3.0/parsed_json.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 import ( 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "math" 26 "strconv" 27 ) 28 29 // 30 // For enhanced performance, simdjson-go can point back into the original JSON buffer for strings, 31 // however this can lead to issues in streaming use cases scenarios, or scenarios in which 32 // the underlying JSON buffer is reused. So the default behaviour is to create copies of all 33 // strings (not just those transformed anyway for unicode escape characters) into the separate 34 // Strings buffer (at the expense of using more memory and less performance). 35 // 36 const alwaysCopyStrings = true 37 38 const JSONVALUEMASK = 0xffffffffffffff 39 const JSONTAGMASK = 0xff << 56 40 const STRINGBUFBIT = 0x80000000000000 41 const STRINGBUFMASK = 0x7fffffffffffff 42 43 const maxdepth = 128 44 45 // FloatFlags are flags recorded when converting floats. 46 type FloatFlags uint64 47 48 // FloatFlag is a flag recorded when parsing floats. 49 type FloatFlag uint64 50 51 const ( 52 // FloatOverflowedInteger is set when number in JSON was in integer notation, 53 // but under/overflowed both int64 and uint64 and therefore was parsed as float. 54 FloatOverflowedInteger FloatFlag = 1 << iota 55 ) 56 57 // Contains returns whether f contains the specified flag. 58 func (f FloatFlags) Contains(flag FloatFlag) bool { 59 return FloatFlag(f)&flag == flag 60 } 61 62 // Flags converts the flag to FloatFlags and optionally merges more flags. 63 func (f FloatFlag) Flags(more ...FloatFlag) FloatFlags { 64 // We operate on a copy, so we can modify f. 65 for _, v := range more { 66 f |= v 67 } 68 return FloatFlags(f) 69 } 70 71 type ParsedJson struct { 72 Message []byte 73 Tape []uint64 74 Strings []byte 75 76 // allows to reuse the internal structures without exposing it. 77 internal *internalParsedJson 78 } 79 80 const indexSlots = 16 81 const indexSize = 1536 // Seems to be a good size for the index buffering 82 const indexSizeWithSafetyBuffer = indexSize - 128 // Make sure we never write beyond buffer 83 84 type indexChan struct { 85 index int 86 length int 87 indexes *[indexSize]uint32 88 } 89 90 type internalParsedJson struct { 91 ParsedJson 92 containingScopeOffset []uint64 93 isvalid bool 94 indexChans chan indexChan 95 indexesChan indexChan 96 buffers [indexSlots][indexSize]uint32 97 buffersOffset uint64 98 ndjson uint64 99 } 100 101 // Iter returns a new Iter. 102 func (pj *ParsedJson) Iter() Iter { 103 return Iter{tape: *pj} 104 } 105 106 // stringAt returns a string at a specific offset in the stringbuffer. 107 func (pj *ParsedJson) stringAt(offset, length uint64) (string, error) { 108 b, err := pj.stringByteAt(offset, length) 109 return string(b), err 110 } 111 112 // stringByteAt returns a string at a specific offset in the stringbuffer. 113 func (pj *ParsedJson) stringByteAt(offset, length uint64) ([]byte, error) { 114 if offset&STRINGBUFBIT == 0 { 115 if offset+length > uint64(len(pj.Message)) { 116 return nil, fmt.Errorf("string message offset (%v) outside valid area (%v)", offset+length, len(pj.Message)) 117 } 118 return pj.Message[offset : offset+length], nil 119 } 120 121 offset = offset & STRINGBUFMASK 122 if offset+length > uint64(len(pj.Strings)) { 123 return nil, fmt.Errorf("string buffer offset (%v) outside valid area (%v)", offset+length, len(pj.Strings)) 124 } 125 return pj.Strings[offset : offset+length], nil 126 } 127 128 // Iter represents a section of JSON. 129 // To start iterating it, use Advance() or AdvanceIter() methods 130 // which will queue the first element. 131 // If an Iter is copied, the copy will be independent. 132 type Iter struct { 133 // The tape where this iter start. 134 tape ParsedJson 135 136 // offset of the next entry to be decoded 137 off int 138 139 // addNext is the number of entries to skip for the next entry. 140 addNext int 141 142 // current value, exclude tag in top bits 143 cur uint64 144 145 // current tag 146 t Tag 147 } 148 149 // loadTape will load the input from the supplied readers. 150 func loadTape(tape, strings io.Reader) (*ParsedJson, error) { 151 b, err := ioutil.ReadAll(tape) 152 if err != nil { 153 return nil, err 154 } 155 if len(b)&7 != 0 { 156 return nil, errors.New("unexpected tape length, should be modulo 8 bytes") 157 } 158 dst := ParsedJson{ 159 Tape: make([]uint64, len(b)/8), 160 Strings: nil, 161 } 162 // Read tape 163 for i := range dst.Tape { 164 dst.Tape[i] = binary.LittleEndian.Uint64(b[i*8 : i*8+8]) 165 } 166 // Read stringbuf 167 b, err = ioutil.ReadAll(strings) 168 if err != nil { 169 return nil, err 170 } 171 dst.Strings = b 172 return &dst, nil 173 } 174 175 // Advance will read the type of the next element 176 // and queues up the value on the same level. 177 func (i *Iter) Advance() Type { 178 i.off += i.addNext 179 if i.off >= len(i.tape.Tape) { 180 i.addNext = 0 181 i.t = TagEnd 182 return TypeNone 183 } 184 185 v := i.tape.Tape[i.off] 186 i.cur = v & JSONVALUEMASK 187 i.t = Tag(v >> 56) 188 i.off++ 189 i.calcNext(false) 190 if i.addNext < 0 { 191 // We can't send error, so move to end. 192 i.moveToEnd() 193 return TypeNone 194 } 195 return TagToType[i.t] 196 } 197 198 // AdvanceInto will read the tag of the next element 199 // and move into and out of arrays , objects and root elements. 200 // This should only be used for strictly manual parsing. 201 func (i *Iter) AdvanceInto() Tag { 202 i.off += i.addNext 203 if i.off >= len(i.tape.Tape) { 204 i.addNext = 0 205 i.t = TagEnd 206 return TagEnd 207 } 208 209 v := i.tape.Tape[i.off] 210 i.cur = v & JSONVALUEMASK 211 i.t = Tag(v >> 56) 212 i.off++ 213 i.calcNext(true) 214 if i.addNext < 0 { 215 // We can't send error, so end tape. 216 i.moveToEnd() 217 return TagEnd 218 } 219 return i.t 220 } 221 222 func (i *Iter) moveToEnd() { 223 i.off = len(i.tape.Tape) 224 i.addNext = 0 225 i.t = TagEnd 226 } 227 228 // calcNext will populate addNext to the correct value to skip. 229 // Specify whether to move into objects/array. 230 func (i *Iter) calcNext(into bool) { 231 i.addNext = 0 232 switch i.t { 233 case TagInteger, TagUint, TagFloat, TagString: 234 i.addNext = 1 235 case TagRoot, TagObjectStart, TagArrayStart: 236 if !into { 237 i.addNext = int(i.cur) - i.off 238 } 239 } 240 } 241 242 // Type returns the queued value type from the previous call to Advance. 243 func (i *Iter) Type() Type { 244 if i.off+i.addNext > len(i.tape.Tape) { 245 return TypeNone 246 } 247 return TagToType[i.t] 248 } 249 250 // AdvanceIter will read the type of the next element 251 // and return an iterator only containing the object. 252 // If dst and i are the same, both will contain the value inside. 253 func (i *Iter) AdvanceIter(dst *Iter) (Type, error) { 254 i.off += i.addNext 255 if i.off == len(i.tape.Tape) { 256 i.addNext = 0 257 i.t = TagEnd 258 return TypeNone, nil 259 } 260 if i.off > len(i.tape.Tape) { 261 return TypeNone, errors.New("offset bigger than tape") 262 } 263 264 // Get current value off tape. 265 v := i.tape.Tape[i.off] 266 i.cur = v & JSONVALUEMASK 267 i.t = Tag(v >> 56) 268 i.off++ 269 i.calcNext(false) 270 if i.addNext < 0 { 271 i.moveToEnd() 272 return TypeNone, errors.New("element has negative offset") 273 } 274 275 // Calculate end of this object. 276 iEnd := i.off + i.addNext 277 typ := TagToType[i.t] 278 279 // Copy i if different 280 if i != dst { 281 *dst = *i 282 } 283 // Move into dst 284 dst.calcNext(true) 285 if dst.addNext < 0 { 286 i.moveToEnd() 287 return TypeNone, errors.New("element has negative offset") 288 } 289 290 if iEnd > len(dst.tape.Tape) { 291 return TypeNone, errors.New("element extends beyond tape") 292 } 293 294 // Restrict destination. 295 dst.tape.Tape = dst.tape.Tape[:iEnd] 296 297 return typ, nil 298 } 299 300 // PeekNext will return the next value type. 301 // Returns TypeNone if next ends iterator. 302 func (i *Iter) PeekNext() Type { 303 if i.off+i.addNext >= len(i.tape.Tape) { 304 return TypeNone 305 } 306 return TagToType[Tag(i.tape.Tape[i.off+i.addNext]>>56)] 307 } 308 309 // PeekNextTag will return the tag at the current offset. 310 // Will return TagEnd if at end of iterator. 311 func (i *Iter) PeekNextTag() Tag { 312 if i.off+i.addNext >= len(i.tape.Tape) { 313 return TagEnd 314 } 315 return Tag(i.tape.Tape[i.off+i.addNext] >> 56) 316 } 317 318 // MarshalJSON will marshal the entire remaining scope of the iterator. 319 func (i *Iter) MarshalJSON() ([]byte, error) { 320 return i.MarshalJSONBuffer(nil) 321 } 322 323 // MarshalJSONBuffer will marshal the remaining scope of the iterator including the current value. 324 // An optional buffer can be provided for fewer allocations. 325 // Output will be appended to the destination. 326 func (i *Iter) MarshalJSONBuffer(dst []byte) ([]byte, error) { 327 var tmpBuf []byte 328 329 // Pre-allocate for 100 deep. 330 var stackTmp [100]uint8 331 // We have a stackNone on top of the stack 332 stack := stackTmp[:1] 333 const ( 334 stackNone = iota 335 stackArray 336 stackObject 337 stackRoot 338 ) 339 340 writeloop: 341 for { 342 // Write key names. 343 if stack[len(stack)-1] == stackObject && i.t != TagObjectEnd { 344 sb, err := i.StringBytes() 345 if err != nil { 346 return nil, fmt.Errorf("expected key within object: %w", err) 347 } 348 dst = append(dst, '"') 349 dst = escapeBytes(dst, sb) 350 dst = append(dst, '"', ':') 351 if i.PeekNextTag() == TagEnd { 352 return nil, fmt.Errorf("unexpected end of tape within object") 353 } 354 i.AdvanceInto() 355 } 356 //fmt.Println(i.t, len(stack)-1, i.off) 357 tagswitch: 358 switch i.t { 359 case TagRoot: 360 isOpenRoot := int(i.cur) > i.off 361 if len(stack) > 1 { 362 if isOpenRoot { 363 return dst, errors.New("root tag open, but not at top of stack") 364 } 365 l := stack[len(stack)-1] 366 switch l { 367 case stackRoot: 368 if i.PeekNextTag() != TagEnd { 369 dst = append(dst, '\n') 370 } 371 stack = stack[:len(stack)-1] 372 break tagswitch 373 case stackNone: 374 break writeloop 375 default: 376 return dst, errors.New("root tag, but not at top of stack, got id " + strconv.Itoa(int(l))) 377 } 378 } 379 380 if isOpenRoot { 381 // Always move into root. 382 i.addNext = 0 383 } 384 i.AdvanceInto() 385 stack = append(stack, stackRoot) 386 continue 387 case TagString: 388 sb, err := i.StringBytes() 389 if err != nil { 390 return nil, err 391 } 392 dst = append(dst, '"') 393 dst = escapeBytes(dst, sb) 394 dst = append(dst, '"') 395 tmpBuf = tmpBuf[:0] 396 case TagInteger: 397 v, err := i.Int() 398 if err != nil { 399 return nil, err 400 } 401 dst = strconv.AppendInt(dst, v, 10) 402 case TagUint: 403 v, err := i.Uint() 404 if err != nil { 405 return nil, err 406 } 407 dst = strconv.AppendUint(dst, v, 10) 408 case TagFloat: 409 v, err := i.Float() 410 if err != nil { 411 return nil, err 412 } 413 dst, err = appendFloat(dst, v) 414 if err != nil { 415 return nil, err 416 } 417 case TagNull: 418 dst = append(dst, []byte("null")...) 419 case TagBoolTrue: 420 dst = append(dst, []byte("true")...) 421 case TagBoolFalse: 422 dst = append(dst, []byte("false")...) 423 case TagObjectStart: 424 dst = append(dst, '{') 425 stack = append(stack, stackObject) 426 // We should not emit commas. 427 i.AdvanceInto() 428 continue 429 case TagObjectEnd: 430 dst = append(dst, '}') 431 if stack[len(stack)-1] != stackObject { 432 return dst, errors.New("end of object with no object on stack") 433 } 434 stack = stack[:len(stack)-1] 435 case TagArrayStart: 436 dst = append(dst, '[') 437 stack = append(stack, stackArray) 438 i.AdvanceInto() 439 continue 440 case TagArrayEnd: 441 dst = append(dst, ']') 442 if stack[len(stack)-1] != stackArray { 443 return nil, errors.New("end of array with no array on stack") 444 } 445 stack = stack[:len(stack)-1] 446 case TagEnd: 447 if i.PeekNextTag() == TagEnd { 448 return nil, errors.New("no content queued in iterator") 449 } 450 i.AdvanceInto() 451 continue 452 } 453 454 if i.PeekNextTag() == TagEnd { 455 break 456 } 457 i.AdvanceInto() 458 459 // Output object separators, etc. 460 switch stack[len(stack)-1] { 461 case stackArray: 462 switch i.t { 463 case TagArrayEnd: 464 default: 465 dst = append(dst, ',') 466 } 467 case stackObject: 468 switch i.t { 469 case TagObjectEnd: 470 default: 471 dst = append(dst, ',') 472 } 473 } 474 } 475 if len(stack) > 1 { 476 return nil, fmt.Errorf("objects or arrays not closed. left on stack: %v", stack[1:]) 477 } 478 return dst, nil 479 } 480 481 // Float returns the float value of the next element. 482 // Integers are automatically converted to float. 483 func (i *Iter) Float() (float64, error) { 484 switch i.t { 485 case TagFloat: 486 if i.off >= len(i.tape.Tape) { 487 return 0, errors.New("corrupt input: expected float, but no more values on tape") 488 } 489 v := math.Float64frombits(i.tape.Tape[i.off]) 490 return v, nil 491 case TagInteger: 492 if i.off >= len(i.tape.Tape) { 493 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 494 } 495 v := int64(i.tape.Tape[i.off]) 496 return float64(v), nil 497 case TagUint: 498 if i.off >= len(i.tape.Tape) { 499 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 500 } 501 v := i.tape.Tape[i.off] 502 return float64(v), nil 503 default: 504 return 0, fmt.Errorf("unable to convert type %v to float", i.t) 505 } 506 } 507 508 // FloatFlags returns the float value of the next element. 509 // This will include flags from parsing. 510 // Integers are automatically converted to float. 511 func (i *Iter) FloatFlags() (float64, FloatFlags, error) { 512 switch i.t { 513 case TagFloat: 514 if i.off >= len(i.tape.Tape) { 515 return 0, 0, errors.New("corrupt input: expected float, but no more values on tape") 516 } 517 v := math.Float64frombits(i.tape.Tape[i.off]) 518 return v, 0, nil 519 case TagInteger: 520 if i.off >= len(i.tape.Tape) { 521 return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape") 522 } 523 v := int64(i.tape.Tape[i.off]) 524 return float64(v), 0, nil 525 case TagUint: 526 if i.off >= len(i.tape.Tape) { 527 return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape") 528 } 529 v := i.tape.Tape[i.off] 530 return float64(v), FloatFlags(i.cur), nil 531 default: 532 return 0, 0, fmt.Errorf("unable to convert type %v to float", i.t) 533 } 534 } 535 536 // Int returns the integer value of the next element. 537 // Integers and floats within range are automatically converted. 538 func (i *Iter) Int() (int64, error) { 539 switch i.t { 540 case TagFloat: 541 if i.off >= len(i.tape.Tape) { 542 return 0, errors.New("corrupt input: expected float, but no more values on tape") 543 } 544 v := math.Float64frombits(i.tape.Tape[i.off]) 545 if v > math.MaxInt64 { 546 return 0, errors.New("float value overflows int64") 547 } 548 if v < math.MinInt64 { 549 return 0, errors.New("float value underflows int64") 550 } 551 return int64(v), nil 552 case TagInteger: 553 if i.off >= len(i.tape.Tape) { 554 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 555 } 556 v := int64(i.tape.Tape[i.off]) 557 return v, nil 558 case TagUint: 559 if i.off >= len(i.tape.Tape) { 560 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 561 } 562 v := i.tape.Tape[i.off] 563 if v > math.MaxInt64 { 564 return 0, errors.New("unsigned integer value overflows int64") 565 } 566 return int64(v), nil 567 default: 568 return 0, fmt.Errorf("unable to convert type %v to float", i.t) 569 } 570 } 571 572 // Uint returns the unsigned integer value of the next element. 573 // Positive integers and floats within range are automatically converted. 574 func (i *Iter) Uint() (uint64, error) { 575 switch i.t { 576 case TagFloat: 577 if i.off >= len(i.tape.Tape) { 578 return 0, errors.New("corrupt input: expected float, but no more values on tape") 579 } 580 v := math.Float64frombits(i.tape.Tape[i.off]) 581 if v > math.MaxUint64 { 582 return 0, errors.New("float value overflows uint64") 583 } 584 if v < 0 { 585 return 0, errors.New("float value is negative. cannot convert to uint") 586 } 587 return uint64(v), nil 588 case TagInteger: 589 if i.off >= len(i.tape.Tape) { 590 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 591 } 592 v := int64(i.tape.Tape[i.off]) 593 if v < 0 { 594 return 0, errors.New("integer value is negative. cannot convert to uint") 595 } 596 597 return uint64(v), nil 598 case TagUint: 599 if i.off >= len(i.tape.Tape) { 600 return 0, errors.New("corrupt input: expected integer, but no more values on tape") 601 } 602 v := i.tape.Tape[i.off] 603 return v, nil 604 default: 605 return 0, fmt.Errorf("unable to convert type %v to float", i.t) 606 } 607 } 608 609 // String() returns a string value. 610 func (i *Iter) String() (string, error) { 611 if i.t != TagString { 612 return "", errors.New("value is not string") 613 } 614 if i.off >= len(i.tape.Tape) { 615 return "", errors.New("corrupt input: no string offset") 616 } 617 618 return i.tape.stringAt(i.cur, i.tape.Tape[i.off]) 619 } 620 621 // StringBytes() returns a byte array. 622 func (i *Iter) StringBytes() ([]byte, error) { 623 if i.t != TagString { 624 return nil, errors.New("value is not string") 625 } 626 if i.off >= len(i.tape.Tape) { 627 return nil, errors.New("corrupt input: no string offset on tape") 628 } 629 return i.tape.stringByteAt(i.cur, i.tape.Tape[i.off]) 630 } 631 632 // StringCvt() returns a string representation of the value. 633 // Root, Object and Arrays are not supported. 634 func (i *Iter) StringCvt() (string, error) { 635 switch i.t { 636 case TagString: 637 return i.String() 638 case TagInteger: 639 v, err := i.Int() 640 return strconv.FormatInt(v, 10), err 641 case TagUint: 642 v, err := i.Uint() 643 return strconv.FormatUint(v, 10), err 644 case TagFloat: 645 v, err := i.Float() 646 if err != nil { 647 return "", err 648 } 649 return floatToString(v) 650 case TagBoolFalse: 651 return "false", nil 652 case TagBoolTrue: 653 return "true", nil 654 case TagNull: 655 return "null", nil 656 } 657 return "", fmt.Errorf("cannot convert type %s to string", TagToType[i.t]) 658 } 659 660 // Root() returns the object embedded in root as an iterator 661 // along with the type of the content of the first element of the iterator. 662 // An optional destination can be supplied to avoid allocations. 663 func (i *Iter) Root(dst *Iter) (Type, *Iter, error) { 664 if i.t != TagRoot { 665 return TypeNone, dst, errors.New("value is not root") 666 } 667 if i.cur > uint64(len(i.tape.Tape)) { 668 return TypeNone, dst, errors.New("root element extends beyond tape") 669 } 670 if dst == nil { 671 c := *i 672 dst = &c 673 } else { 674 dst.cur = i.cur 675 dst.off = i.off 676 dst.t = i.t 677 dst.tape.Strings = i.tape.Strings 678 dst.tape.Message = i.tape.Message 679 } 680 dst.addNext = 0 681 dst.tape.Tape = i.tape.Tape[:i.cur-1] 682 return dst.AdvanceInto().Type(), dst, nil 683 } 684 685 // Bool() returns the bool value. 686 func (i *Iter) Bool() (bool, error) { 687 switch i.t { 688 case TagBoolTrue: 689 return true, nil 690 case TagBoolFalse: 691 return false, nil 692 } 693 return false, fmt.Errorf("value is not bool, but %v", i.t) 694 } 695 696 // Interface returns the value as an interface. 697 // Objects are returned as map[string]interface{}. 698 // Arrays are returned as []interface{}. 699 // Float values are returned as float64. 700 // Integer values are returned as int64 or uint64. 701 // String values are returned as string. 702 // Boolean values are returned as bool. 703 // Null values are returned as nil. 704 // Root objects are returned as []interface{}. 705 func (i *Iter) Interface() (interface{}, error) { 706 switch i.t.Type() { 707 case TypeUint: 708 return i.Uint() 709 case TypeInt: 710 return i.Int() 711 case TypeFloat: 712 return i.Float() 713 case TypeNull: 714 return nil, nil 715 case TypeArray: 716 arr, err := i.Array(nil) 717 if err != nil { 718 return nil, err 719 } 720 return arr.Interface() 721 case TypeString: 722 return i.String() 723 case TypeObject: 724 obj, err := i.Object(nil) 725 if err != nil { 726 return nil, err 727 } 728 return obj.Map(nil) 729 case TypeBool: 730 return i.t == TagBoolTrue, nil 731 case TypeRoot: 732 var dst []interface{} 733 var tmp Iter 734 for { 735 typ, obj, err := i.Root(&tmp) 736 if err != nil { 737 return nil, err 738 } 739 if typ == TypeNone { 740 break 741 } 742 elem, err := obj.Interface() 743 if err != nil { 744 return nil, err 745 } 746 dst = append(dst, elem) 747 typ = i.Advance() 748 if typ != TypeRoot { 749 break 750 } 751 } 752 return dst, nil 753 case TypeNone: 754 if i.PeekNextTag() == TagEnd { 755 return nil, errors.New("no content in iterator") 756 } 757 i.Advance() 758 return i.Interface() 759 default: 760 } 761 return nil, fmt.Errorf("unknown tag type: %v", i.t) 762 } 763 764 // Object will return the next element as an object. 765 // An optional destination can be given. 766 func (i *Iter) Object(dst *Object) (*Object, error) { 767 if i.t != TagObjectStart { 768 return nil, errors.New("next item is not object") 769 } 770 end := i.cur 771 if end < uint64(i.off) { 772 return nil, errors.New("corrupt input: object ends at index before start") 773 } 774 if uint64(len(i.tape.Tape)) < end { 775 return nil, errors.New("corrupt input: object extended beyond tape") 776 } 777 if dst == nil { 778 dst = &Object{} 779 } 780 dst.tape.Tape = i.tape.Tape[:end] 781 dst.tape.Strings = i.tape.Strings 782 dst.tape.Message = i.tape.Message 783 dst.off = i.off 784 785 return dst, nil 786 } 787 788 // Array will return the next element as an array. 789 // An optional destination can be given. 790 func (i *Iter) Array(dst *Array) (*Array, error) { 791 if i.t != TagArrayStart { 792 return nil, errors.New("next item is not object") 793 } 794 end := i.cur 795 if uint64(len(i.tape.Tape)) < end { 796 return nil, errors.New("corrupt input: object extended beyond tape") 797 } 798 if dst == nil { 799 dst = &Array{} 800 } 801 dst.tape.Tape = i.tape.Tape[:end] 802 dst.tape.Strings = i.tape.Strings 803 dst.tape.Message = i.tape.Message 804 dst.off = i.off 805 806 return dst, nil 807 } 808 809 func (pj *ParsedJson) Reset() { 810 pj.Tape = pj.Tape[:0] 811 pj.Strings = pj.Strings[:0] 812 pj.Message = pj.Message[:0] 813 } 814 815 func (pj *ParsedJson) get_current_loc() uint64 { 816 return uint64(len(pj.Tape)) 817 } 818 819 func (pj *ParsedJson) write_tape(val uint64, c byte) { 820 pj.Tape = append(pj.Tape, val|(uint64(c)<<56)) 821 } 822 823 // writeTapeTagVal will write a tag with no embedded value and a value to the tape. 824 func (pj *ParsedJson) writeTapeTagVal(tag Tag, val uint64) { 825 pj.Tape = append(pj.Tape, uint64(tag)<<56, val) 826 } 827 828 func (pj *ParsedJson) writeTapeTagValFlags(tag Tag, val, flags uint64) { 829 pj.Tape = append(pj.Tape, uint64(tag)<<56|flags, val) 830 } 831 832 func (pj *ParsedJson) write_tape_s64(val int64) { 833 pj.writeTapeTagVal(TagInteger, uint64(val)) 834 } 835 836 func (pj *ParsedJson) write_tape_double(d float64) { 837 pj.writeTapeTagVal(TagFloat, math.Float64bits(d)) 838 } 839 840 func (pj *ParsedJson) annotate_previousloc(saved_loc uint64, val uint64) { 841 pj.Tape[saved_loc] |= val 842 } 843 844 // Tag indicates the data type of a tape entry 845 type Tag uint8 846 847 const ( 848 TagString = Tag('"') 849 TagInteger = Tag('l') 850 TagUint = Tag('u') 851 TagFloat = Tag('d') 852 TagNull = Tag('n') 853 TagBoolTrue = Tag('t') 854 TagBoolFalse = Tag('f') 855 TagObjectStart = Tag('{') 856 TagObjectEnd = Tag('}') 857 TagArrayStart = Tag('[') 858 TagArrayEnd = Tag(']') 859 TagRoot = Tag('r') 860 TagEnd = Tag(0) 861 ) 862 863 var tagOpenToClose = [256]Tag{ 864 TagObjectStart: TagObjectEnd, 865 TagArrayStart: TagArrayEnd, 866 TagRoot: TagRoot, 867 } 868 869 func (t Tag) String() string { 870 return string([]byte{byte(t)}) 871 } 872 873 // Type is a JSON value type. 874 type Type uint8 875 876 const ( 877 TypeNone Type = iota 878 TypeNull 879 TypeString 880 TypeInt 881 TypeUint 882 TypeFloat 883 TypeBool 884 TypeObject 885 TypeArray 886 TypeRoot 887 ) 888 889 // String returns the type as a string. 890 func (t Type) String() string { 891 switch t { 892 case TypeNone: 893 return "(no type)" 894 case TypeNull: 895 return "null" 896 case TypeString: 897 return "string" 898 case TypeInt: 899 return "int" 900 case TypeUint: 901 return "uint" 902 case TypeFloat: 903 return "float" 904 case TypeBool: 905 return "bool" 906 case TypeObject: 907 return "object" 908 case TypeArray: 909 return "array" 910 case TypeRoot: 911 return "root" 912 } 913 return "(invalid)" 914 } 915 916 // TagToType converts a tag to type. 917 // For arrays and objects only the start tag will return types. 918 // All non-existing tags returns TypeNone. 919 var TagToType = [256]Type{ 920 TagString: TypeString, 921 TagInteger: TypeInt, 922 TagUint: TypeUint, 923 TagFloat: TypeFloat, 924 TagNull: TypeNull, 925 TagBoolTrue: TypeBool, 926 TagBoolFalse: TypeBool, 927 TagObjectStart: TypeObject, 928 TagArrayStart: TypeArray, 929 TagRoot: TypeRoot, 930 } 931 932 // Type converts a tag to a type. 933 // Only basic types and array+object start match a type. 934 func (t Tag) Type() Type { 935 return TagToType[t] 936 } 937 938 func (pj *internalParsedJson) dump_raw_tape() bool { 939 940 if !pj.isvalid { 941 return false 942 } 943 944 for tapeidx := uint64(0); tapeidx < uint64(len(pj.Tape)); tapeidx++ { 945 howmany := uint64(0) 946 tape_val := pj.Tape[tapeidx] 947 ntype := byte(tape_val >> 56) 948 fmt.Printf("%d : %c", tapeidx, ntype) 949 950 if ntype == 'r' { 951 howmany = tape_val & JSONVALUEMASK 952 } else { 953 fmt.Errorf("Error: no starting root node?\n") 954 return false 955 } 956 fmt.Printf("\t// pointing to %d (right after last node)\n", howmany) 957 958 // Decrement howmany (since we're adding one now for the ndjson support) 959 howmany -= 1 960 961 tapeidx++ 962 for ; tapeidx < howmany; tapeidx++ { 963 tape_val = pj.Tape[tapeidx] 964 fmt.Printf("%d : ", tapeidx) 965 ntype := Tag(tape_val >> 56) 966 payload := tape_val & JSONVALUEMASK 967 switch ntype { 968 case TagString: // we have a string 969 if tapeidx+1 >= howmany { 970 return false 971 } 972 fmt.Printf("string \"") 973 tapeidx++ 974 string_length := pj.Tape[tapeidx] 975 str, err := pj.stringAt(payload, string_length) 976 if err != nil { 977 fmt.Printf("string err:%v\n", err) 978 return false 979 } 980 fmt.Printf("%s (o:%d, l:%d)", print_with_escapes([]byte(str)), payload, string_length) 981 fmt.Println("\"") 982 983 case TagInteger: // we have a long int 984 if tapeidx+1 >= howmany { 985 return false 986 } 987 tapeidx++ 988 fmt.Printf("integer %d\n", int64(pj.Tape[tapeidx])) 989 990 case TagFloat: // we have a double 991 if tapeidx+1 >= howmany { 992 return false 993 } 994 tapeidx++ 995 fmt.Printf("float %f\n", math.Float64frombits(pj.Tape[tapeidx])) 996 997 case TagNull: // we have a null 998 fmt.Printf("null\n") 999 1000 case TagBoolTrue: // we have a true 1001 fmt.Printf("true\n") 1002 1003 case TagBoolFalse: // we have a false 1004 fmt.Printf("false\n") 1005 1006 case TagObjectStart: // we have an object 1007 fmt.Printf("{\t// pointing to next Tape location %d (first node after the scope) \n", payload) 1008 1009 case TagObjectEnd: // we end an object 1010 fmt.Printf("}\t// pointing to previous Tape location %d (start of the scope) \n", payload) 1011 1012 case TagArrayStart: // we start an array 1013 fmt.Printf("\t// pointing to next Tape location %d (first node after the scope) \n", payload) 1014 1015 case TagArrayEnd: // we end an array 1016 fmt.Printf("]\t// pointing to previous Tape location %d (start of the scope) \n", payload) 1017 1018 case TagRoot: // we start and end with the root node 1019 fmt.Printf("end of root\n") 1020 return false 1021 1022 default: 1023 return false 1024 } 1025 } 1026 1027 tape_val = pj.Tape[tapeidx] 1028 payload := tape_val & JSONVALUEMASK 1029 ntype = byte(tape_val >> 56) 1030 fmt.Printf("%d : %c\t// pointing to %d (start root)\n", tapeidx, ntype, payload) 1031 } 1032 1033 return true 1034 } 1035 1036 func print_with_escapes(src []byte) string { 1037 return string(escapeBytes(make([]byte, 0, len(src)+len(src)>>4), src)) 1038 } 1039 1040 // escapeBytes will escape JSON bytes. 1041 // Output is appended to dst. 1042 func escapeBytes(dst, src []byte) []byte { 1043 for _, s := range src { 1044 switch s { 1045 case '\b': 1046 dst = append(dst, '\\', 'b') 1047 1048 case '\f': 1049 dst = append(dst, '\\', 'f') 1050 1051 case '\n': 1052 dst = append(dst, '\\', 'n') 1053 1054 case '\r': 1055 dst = append(dst, '\\', 'r') 1056 1057 case '"': 1058 dst = append(dst, '\\', '"') 1059 1060 case '\t': 1061 dst = append(dst, '\\', 't') 1062 1063 case '\\': 1064 dst = append(dst, '\\', '\\') 1065 1066 default: 1067 if s <= 0x1f { 1068 dst = append(dst, '\\', 'u', '0', '0', valToHex[s>>4], valToHex[s&0xf]) 1069 } else { 1070 dst = append(dst, s) 1071 } 1072 } 1073 } 1074 1075 return dst 1076 } 1077 1078 var valToHex = [16]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'} 1079 1080 // floatToString converts a float to string similar to Go stdlib. 1081 func floatToString(f float64) (string, error) { 1082 var tmp [32]byte 1083 v, err := appendFloat(tmp[:0], f) 1084 return string(v), err 1085 } 1086 1087 // appendFloat converts a float to string similar to Go stdlib and appends it to dst. 1088 func appendFloat(dst []byte, f float64) ([]byte, error) { 1089 if math.IsInf(f, 0) || math.IsNaN(f) { 1090 return nil, errors.New("INF or NaN number found") 1091 } 1092 1093 // Convert as if by ES6 number to string conversion. 1094 // This matches most other JSON generators. 1095 // See golang.org/issue/6384 and golang.org/issue/14135. 1096 // Like fmt %g, but the exponent cutoffs are different 1097 // and exponents themselves are not padded to two digits. 1098 abs := math.Abs(f) 1099 fmt := byte('f') 1100 if abs != 0 { 1101 if abs < 1e-6 || abs >= 1e21 { 1102 fmt = 'e' 1103 } 1104 } 1105 dst = strconv.AppendFloat(dst, f, fmt, -1, 64) 1106 if fmt == 'e' { 1107 // clean up e-09 to e-9 1108 n := len(dst) 1109 if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { 1110 dst[n-2] = dst[n-1] 1111 dst = dst[:n-1] 1112 } 1113 } 1114 return dst, nil 1115 }