github.com/urso/go-structform@v0.0.2/ubjson/parse.go (about) 1 package ubjson 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "io" 7 "math" 8 9 structform "github.com/urso/go-structform" 10 ) 11 12 type Parser struct { 13 visitor structform.Visitor 14 strVisitor structform.StringRefVisitor 15 16 // last fail state 17 err error 18 19 // parser state machine 20 state stateStack 21 valueState stateStack 22 23 length lengthStack 24 25 buffer []byte 26 buffer0 [64]byte 27 28 // internal parser state 29 marker byte 30 valueType structform.BaseType 31 } 32 33 //go:generate stringer -type=stateType 34 type stateType uint8 35 36 //go:generate stringer -type=stateStep 37 type stateStep uint8 38 39 type state struct { 40 stateType 41 stateStep 42 } 43 44 const ( 45 stFail stateType = iota 46 stNext 47 stFixed // values of fixed size 48 stHighPrec // high precision number 49 stString // string 50 stArray // array 51 stArrayDyn // dynamic array 52 stArrayCount // array with element count 53 stArrayTyped // typed array with element count 54 stObject // object 55 stObjectDyn // dynamic object 56 stObjectCount // object with known # of fields 57 stObjectTyped // object with all values of same type 58 ) 59 60 const ( 61 stStart stateStep = iota 62 63 // stValue sub-states 64 stNil 65 stNoop 66 stTrue 67 stFalse 68 stInt8 69 stUInt8 70 stInt16 71 stInt32 72 stInt64 73 stFloat32 74 stFloat64 75 stChar 76 77 // variable size primitive value types 78 stWithLen 79 80 // array/object states 81 stWithType0 82 stWithType1 83 stCont 84 stFieldName 85 stFieldNameLen 86 ) 87 88 var ( 89 errUnknownMarker = errors.New("unknown ubjson marker") 90 errIncomplete = errors.New("Incomplete UBJSON input") 91 errNegativeLen = errors.New("negative length encountered") 92 errInvalidState = errors.New("invalid state") 93 errMissingArrEnd = errors.New("missing ']'") 94 errMissingObjEnd = errors.New("missing '}'") 95 errMissingCount = errors.New("missing count marker") 96 ) 97 98 func ParseReader(in io.Reader, vs structform.Visitor) (int64, error) { 99 return NewParser(vs).ParseReader(in) 100 } 101 102 func Parse(b []byte, vs structform.Visitor) error { 103 return NewParser(vs).Parse(b) 104 } 105 106 func ParseString(str string, vs structform.Visitor) error { 107 return NewParser(vs).ParseString(str) 108 } 109 110 func NewParser(vs structform.Visitor) *Parser { 111 p := &Parser{} 112 p.init(vs) 113 return p 114 } 115 116 func (p *Parser) init(vs structform.Visitor) { 117 *p = Parser{ 118 visitor: vs, 119 strVisitor: structform.MakeStringRefVisitor(vs), 120 } 121 p.buffer = p.buffer0[:0] 122 p.length.stack = p.length.stack0[:0] 123 p.state.current = state{stNext, stStart} 124 p.state.stack = p.state.stack0[:0] 125 p.valueState.stack = p.valueState.stack0[:0] 126 } 127 128 func (p *Parser) Parse(b []byte) error { 129 p.err = p.feed(b) 130 if p.err == nil { 131 p.err = p.finalize() 132 } 133 return p.err 134 } 135 136 func (p *Parser) ParseReader(in io.Reader) (int64, error) { 137 n, err := io.Copy(p, in) 138 if err == nil { 139 err = p.finalize() 140 } 141 return n, err 142 } 143 144 func (p *Parser) ParseString(s string) error { 145 return p.Parse(str2Bytes(s)) 146 } 147 148 func (p *Parser) finalize() error { 149 for len(p.state.stack) > 0 { 150 var err error 151 152 switch p.state.current.stateType { 153 case stArrayCount, stArrayTyped: 154 if p.length.current != 0 || p.state.current.stateStep != stCont { 155 return errMissingArrEnd 156 } 157 158 err = p.visitor.OnArrayFinished() 159 case stObjectCount, stObjectTyped: 160 step := p.state.current.stateStep 161 l := p.length.current 162 if l != 0 || step != stFieldName { 163 return errMissingObjEnd 164 } 165 err = p.visitor.OnObjectFinished() 166 } 167 168 if err != nil { 169 return err 170 } 171 _, err = p.popState() 172 } 173 174 st := &p.state.current 175 incomplete := len(p.state.stack) > 0 || 176 st.stateStep != stStart || 177 st.stateType != stNext 178 179 if incomplete { 180 return errIncomplete 181 } 182 return nil 183 } 184 185 func (p *Parser) Write(b []byte) (int, error) { 186 p.err = p.feed(b) 187 if p.err != nil { 188 p.state.current = state{stFail, stStart} 189 return 0, p.err 190 } 191 return len(b), nil 192 } 193 194 func (p *Parser) feed(b []byte) error { 195 for len(b) > 0 { 196 var err error 197 n, _, err := p.feedUntil(b) 198 if err != nil { 199 return err 200 } 201 202 b = b[n:] 203 } 204 205 return nil 206 } 207 208 func (p *Parser) feedUntil(b []byte) (int, bool, error) { 209 var ( 210 orig = b 211 done bool 212 err error 213 ) 214 215 for { 216 b, done, err = p.execStep(b) 217 if done || err != nil { 218 break 219 } 220 221 if len(b) == 0 { 222 break 223 } 224 } 225 return len(orig) - len(b), done, err 226 } 227 228 func (p *Parser) execStep(b []byte) ([]byte, bool, error) { 229 var ( 230 err error 231 done bool 232 ) 233 234 switch p.state.current.stateType { 235 case stFail: 236 return b, false, p.err 237 case stNext: 238 b, done, err = p.stepValue(b) 239 case stFixed: 240 b, done, err = p.stepFixedValue(b) 241 case stHighPrec: 242 b, done, err = p.stepString(b) 243 case stString: 244 b, done, err = p.stepString(b) 245 246 case stArray: 247 b, err = p.stepArrayInit(b) 248 case stArrayDyn: 249 b, done, err = p.stepArrayDyn(b) 250 case stArrayCount: 251 b, done, err = p.stepArrayCount(b) 252 case stArrayTyped: 253 b, done, err = p.stepArrayTyped(b) 254 255 case stObject: 256 b, err = p.stepObjectInit(b) 257 case stObjectDyn: 258 b, done, err = p.stepObjectDyn(b) 259 case stObjectCount: 260 b, done, err = p.stepObjectCount(b) 261 case stObjectTyped: 262 b, done, err = p.stepObjectTyped(b) 263 264 default: 265 err = errInvalidState 266 } 267 268 if err != nil { 269 p.err = err 270 } 271 272 return b, done, err 273 } 274 275 func (p *Parser) stepFixedValue(b []byte) ([]byte, bool, error) { 276 var ( 277 tmp []byte 278 err error 279 done bool 280 ) 281 282 switch p.state.current.stateStep { 283 case stNil: 284 done, err = true, p.visitor.OnNil() 285 case stNoop: 286 287 case stTrue: 288 done, err = true, p.visitor.OnBool(true) 289 case stFalse: 290 done, err = true, p.visitor.OnBool(false) 291 case stInt8: 292 b, done, err = b[1:], true, p.visitor.OnInt8(int8(b[0])) 293 case stUInt8: 294 b, done, err = b[1:], true, p.visitor.OnUint8(b[0]) 295 case stChar: 296 b, tmp = p.collect(b, 1) 297 if done = tmp != nil; done { 298 err = p.visitor.OnByte(tmp[0]) 299 } 300 case stInt16: 301 b, tmp = p.collect(b, 2) 302 if done = tmp != nil; done { 303 err = p.visitor.OnInt16(readInt16(tmp)) 304 } 305 case stInt32: 306 b, tmp = p.collect(b, 4) 307 if done = tmp != nil; done { 308 err = p.visitor.OnInt32(readInt32(tmp)) 309 } 310 case stInt64: 311 b, tmp = p.collect(b, 8) 312 if done = tmp != nil; done { 313 err = p.visitor.OnInt64(readInt64(tmp)) 314 } 315 case stFloat32: 316 b, tmp = p.collect(b, 4) 317 if done = tmp != nil; done { 318 err = p.visitor.OnFloat32(readFloat32(tmp)) 319 } 320 case stFloat64: 321 b, tmp = p.collect(b, 8) 322 if done = tmp != nil; done { 323 err = p.visitor.OnFloat64(readFloat64(tmp)) 324 } 325 default: 326 return b, false, err 327 } 328 329 if done && err == nil { 330 done, err = p.popState() 331 } 332 333 return b, done, err 334 } 335 336 func (p *Parser) stepString(b []byte) ([]byte, bool, error) { 337 var ( 338 err error 339 done bool 340 st = &p.state.current 341 ) 342 343 switch st.stateStep { 344 case stStart: 345 b, err = p.stepLen(b, st.withStep(stWithLen)) 346 if !(err == nil && st.stateStep == stWithLen) { 347 break 348 } 349 fallthrough 350 case stWithLen: 351 L := p.length.current 352 if L == 0 { 353 done = true 354 err = p.visitor.OnString("") 355 } else { 356 var tmp []byte 357 if b, tmp = p.collect(b, int(L)); tmp != nil { 358 done = true 359 err = p.strVisitor.OnStringRef(tmp) 360 } 361 } 362 } 363 364 if done { 365 done, err = p.popLenState() 366 } 367 return b, done, err 368 } 369 370 func (p *Parser) stepArrayInit(b []byte) ([]byte, error) { 371 var ( 372 err error 373 st = &p.state.current 374 ) 375 376 switch b[0] { 377 case countMarker: 378 b, st.stateType = b[1:], stArrayCount 379 case typeMarker: 380 b, st.stateType = b[1:], stArrayTyped 381 default: 382 st.stateType = stArrayDyn 383 err = p.visitor.OnArrayStart(-1, structform.AnyType) 384 } 385 386 return b, err 387 } 388 389 func (p *Parser) stepArrayDyn(b []byte) ([]byte, bool, error) { 390 if b[0] == arrEndMarker { 391 err := p.visitor.OnArrayFinished() 392 done := true 393 if err == nil { 394 done, err = p.popState() 395 } 396 return b[1:], done, err 397 } 398 399 if st := &p.state.current; st.stateStep == stStart { 400 st.stateStep = stCont // ensure continuation state is pushed to stack 401 b, _, err := p.stepValue(b) 402 return b, false, err 403 } 404 b, _, err := p.stepValue(b) 405 return b, false, err 406 } 407 408 func (p *Parser) stepArrayCount(b []byte) ([]byte, bool, error) { 409 var ( 410 st = &p.state.current 411 step = st.stateStep 412 ) 413 414 // parse array header 415 if step == stStart { 416 b, err := p.stepLen(b, st.withStep(stWithLen)) 417 return b, false, err 418 } 419 420 l := int(p.length.current) 421 if step == stWithLen { 422 p.state.current.stateStep = stCont 423 err := p.visitor.OnArrayStart(l, structform.AnyType) 424 if err != nil { 425 return b, false, err 426 } 427 428 } 429 430 if l == 0 { 431 err := p.visitor.OnArrayFinished() 432 done := true 433 if err == nil { 434 done, err = p.popLenState() 435 } 436 return b, done, err 437 } 438 439 p.length.current-- 440 b, _, err := p.stepValue(b) 441 return b, false, err 442 } 443 444 func (p *Parser) stepArrayTyped(b []byte) ([]byte, bool, error) { 445 step := p.state.current.stateStep 446 447 // parse typed array header 448 switch step { 449 case stStart, stWithType0, stWithType1: 450 b, err := p.stepTypeLenHeader(b, stWithLen) 451 return b, false, err 452 } 453 454 l := int(p.length.current) 455 if step == stWithLen { 456 p.state.current.stateStep = stCont 457 err := p.visitor.OnArrayStart(l, p.valueType) 458 if err != nil { 459 return b, false, err 460 } 461 } 462 463 if l == 0 { 464 err := p.visitor.OnArrayFinished() 465 done := true 466 if err == nil { 467 done, err = p.popLenState() 468 } 469 return b, done, err 470 } 471 472 p.length.current-- 473 vs := p.valueState.current 474 p.pushState(vs) 475 b, _, err := p.execStep(b) 476 return b, false, err 477 } 478 479 func (p *Parser) stepTypeLenHeader(b []byte, cont stateStep) ([]byte, error) { 480 st := p.state.current 481 step := st.stateStep 482 483 switch step { 484 case stStart: 485 return p.stepType(b, st.withStep(stWithType0)) 486 487 case stWithType0: 488 if b[0] != countMarker { 489 return b, errMissingCount 490 } 491 p.state.current = st.withStep(stWithType1) 492 return b[1:], nil 493 494 case stWithType1: 495 return p.stepLen(b, st.withStep(cont)) 496 497 default: 498 return b, nil 499 } 500 } 501 502 func (p *Parser) stepObjectInit(b []byte) ([]byte, error) { 503 var ( 504 st = &p.state.current 505 err error 506 ) 507 508 switch b[0] { 509 case countMarker: 510 b, st.stateType = b[1:], stObjectCount 511 case typeMarker: 512 b, st.stateType = b[1:], stObjectTyped 513 default: 514 st.stateType, err = stObjectDyn, p.visitor.OnObjectStart(-1, structform.AnyType) 515 } 516 517 return b, err 518 } 519 520 func (p *Parser) stepObjectDyn(b []byte) ([]byte, bool, error) { 521 var ( 522 err error 523 st = &p.state.current 524 step = st.stateStep 525 ) 526 527 if step == stStart { 528 if b[0] == objEndMarker { 529 err := p.visitor.OnObjectFinished() 530 done := true 531 if err == nil { 532 done, err = p.popState() 533 } 534 return b[1:], done, err 535 } 536 } 537 538 switch step { 539 case stStart: 540 b, err = p.stepLen(b, st.withStep(stFieldNameLen)) 541 case stFieldNameLen: 542 L := p.length.current 543 var tmp []byte 544 if b, tmp = p.collect(b, int(L)); tmp != nil { 545 p.popLen() 546 err = p.strVisitor.OnKeyRef(tmp) 547 } 548 st.stateStep = stCont 549 case stCont: 550 st.stateStep = stStart 551 b, _, err = p.stepValue(b) 552 } 553 554 return b, false, err 555 } 556 557 func (p *Parser) stepObjectCount(b []byte) ([]byte, bool, error) { 558 var ( 559 st = &p.state.current 560 step = st.stateStep 561 ) 562 563 if step == stStart { 564 b, err := p.stepLen(b, st.withStep(stWithLen)) 565 return b, false, err 566 } 567 568 done, b, err := p.stepObjectCountedContent(b, false) 569 if done { 570 done, err = p.popLenState() 571 } 572 return b, done, err 573 } 574 575 func (p *Parser) stepObjectTyped(b []byte) ([]byte, bool, error) { 576 st := &p.state.current 577 step := st.stateStep 578 579 switch step { 580 case stStart, stWithType0, stWithType1: 581 b, err := p.stepTypeLenHeader(b, stWithLen) 582 return b, false, err 583 } 584 585 done, b, err := p.stepObjectCountedContent(b, true) 586 if done { 587 p.valueState.pop() 588 done, err = p.popLenState() 589 } 590 return b, done, err 591 } 592 593 func (p *Parser) stepObjectCountedContent(b []byte, typed bool) (bool, []byte, error) { 594 var ( 595 err error 596 st = &p.state.current 597 step = st.stateStep 598 end = false 599 ) 600 601 switch step { 602 case stWithLen: 603 L := p.length.current 604 err := p.visitor.OnObjectStart(int(L), structform.AnyType) 605 if err != nil { 606 return end, b, err 607 } 608 609 if L == 0 { 610 end = p.length.current == 0 611 break 612 } 613 614 st.stateStep = stFieldName 615 fallthrough 616 617 case stFieldName: 618 end = p.length.current == 0 619 if end { 620 break 621 } 622 b, err = p.stepLen(b, st.withStep(stFieldNameLen)) 623 624 case stFieldNameLen: 625 L := p.length.current 626 var tmp []byte 627 if b, tmp = p.collect(b, int(L)); tmp != nil { 628 p.popLen() 629 err = p.strVisitor.OnKeyRef(tmp) 630 } 631 st.stateStep = stCont 632 633 case stCont: 634 p.length.current-- 635 st.stateStep = stFieldName 636 // handle object field value 637 if typed { 638 p.pushState(p.valueState.current) 639 } else { 640 b, _, err = p.stepValue(b) 641 } 642 } 643 644 if end { 645 err = p.visitor.OnObjectFinished() 646 } 647 return end, b, err 648 } 649 650 func (p *Parser) stepType(b []byte, cont state) ([]byte, error) { 651 marker := b[0] 652 b = b[1:] 653 p.state.current = cont 654 655 // TODO: analyze marker 656 state, err := markerToStartState(marker) 657 if err != nil { 658 return nil, err 659 } 660 p.valueState.push(state) 661 p.valueType = markerToBaseType(marker) 662 663 return b, nil 664 } 665 666 func (p *Parser) stepLen(b []byte, cont state) ([]byte, error) { 667 if p.marker == noMarker { 668 p.marker = b[0] 669 b = b[1:] 670 if len(b) == 0 { 671 return nil, nil 672 } 673 } 674 675 var tmp []byte 676 complete := false 677 L := int64(-1) 678 679 switch p.marker { 680 case int8Marker: 681 complete, L, b = true, int64(int8(b[0])), b[1:] 682 case uint8Marker: 683 complete, L, b = true, int64(b[0]), b[1:] 684 case int16Marker: 685 if b, tmp = p.collect(b, 2); tmp != nil { 686 complete, L = true, int64(readInt16(tmp)) 687 } 688 case int32Marker: 689 if b, tmp = p.collect(b, 4); tmp != nil { 690 complete, L = true, int64(readInt32(tmp)) 691 } 692 case int64Marker: 693 if b, tmp = p.collect(b, 8); tmp != nil { 694 complete, L = true, readInt64(tmp) 695 } 696 } 697 698 if !complete { 699 return b, nil 700 } 701 702 if L < 0 { 703 return nil, errNegativeLen 704 } 705 706 p.marker = noMarker 707 p.state.current = cont 708 p.pushLen(L) 709 return b, nil 710 } 711 712 func (p *Parser) collect(b []byte, count int) ([]byte, []byte) { 713 if len(p.buffer) > 0 { 714 delta := count - len(p.buffer) 715 if delta > 0 { 716 N := delta 717 complete := true 718 if N > len(b) { 719 complete = false 720 N = len(b) 721 } 722 723 p.buffer = append(p.buffer, b[:N]...) 724 if !complete { 725 return nil, nil 726 } 727 728 // advance read buffer 729 b = b[N:] 730 } 731 732 if len(p.buffer) >= count { 733 tmp := p.buffer[:count] 734 if len(p.buffer) == count { 735 p.buffer = p.buffer0[:0] 736 } else { 737 p.buffer = p.buffer[count:] 738 } 739 return b, tmp 740 } 741 } 742 743 if len(b) >= count { 744 return b[count:], b[:count] 745 } 746 747 p.buffer = append(p.buffer, b...) 748 return nil, nil 749 } 750 751 func (p *Parser) stepValue(b []byte) ([]byte, bool, error) { 752 state, err := markerToStartState(b[0]) 753 if err != nil { 754 return nil, false, err 755 } 756 757 done := true 758 switch state.stateStep { 759 case stNil: 760 b, err = b[1:], p.visitor.OnNil() 761 case stNoop: 762 done = false 763 b, err = b[1:], nil 764 case stTrue: 765 b, err = b[1:], p.visitor.OnBool(true) 766 case stFalse: 767 b, err = b[1:], p.visitor.OnBool(false) 768 default: 769 done = false 770 b, err = p.advanceMarker(state, b) 771 } 772 773 return b, done, err 774 } 775 776 func (p *Parser) advanceMarker(s state, b []byte) ([]byte, error) { 777 p.pushState(s) 778 return b[1:], nil 779 } 780 781 func (p *Parser) pushLen(l int64) { p.length.push(l) } 782 func (p *Parser) popLen() { p.length.pop() } 783 784 func (p *Parser) pushState(next state) { p.state.push(next) } 785 func (p *Parser) popState() (bool, error) { 786 p.state.pop() 787 return len(p.state.stack) == 0, nil 788 } 789 790 func (p *Parser) popLenState() (bool, error) { 791 p.popLen() 792 return p.popState() 793 } 794 795 func readInt16(b []byte) int16 { 796 return int16(binary.BigEndian.Uint16(b)) 797 } 798 799 func readInt32(b []byte) int32 { 800 return int32(binary.BigEndian.Uint32(b)) 801 } 802 803 func readInt64(b []byte) int64 { 804 return int64(binary.BigEndian.Uint64(b)) 805 } 806 807 func readFloat32(b []byte) float32 { 808 bits := binary.BigEndian.Uint32(b) 809 return math.Float32frombits(bits) 810 } 811 812 func readFloat64(b []byte) float64 { 813 bits := binary.BigEndian.Uint64(b) 814 return math.Float64frombits(bits) 815 } 816 817 func markerToStartState(marker byte) (state, error) { 818 switch marker { 819 case nullMarker: 820 return state{stFixed, stNil}, nil 821 case noopMarker: 822 return state{stFixed, stNoop}, nil 823 case trueMarker: 824 return state{stFixed, stTrue}, nil 825 case falseMarker: 826 return state{stFixed, stFalse}, nil 827 case int8Marker: 828 return state{stFixed, stInt8}, nil 829 case uint8Marker: 830 return state{stFixed, stUInt8}, nil 831 case int16Marker: 832 return state{stFixed, stInt16}, nil 833 case int32Marker: 834 return state{stFixed, stInt32}, nil 835 case int64Marker: 836 return state{stFixed, stInt64}, nil 837 case float32Marker: 838 return state{stFixed, stFloat32}, nil 839 case float64Marker: 840 return state{stFixed, stFloat64}, nil 841 case highPrecMarker: 842 return state{stHighPrec, stStart}, nil 843 case charMarker: 844 return state{stFixed, stChar}, nil 845 case stringMarker: 846 return state{stString, stStart}, nil 847 case objStartMarker: 848 return state{stObject, stStart}, nil 849 case arrStartMarker: 850 return state{stArray, stStart}, nil 851 default: 852 return state{stFail, stStart}, errUnknownMarker 853 } 854 } 855 856 func markerToBaseType(marker byte) structform.BaseType { 857 switch marker { 858 case falseMarker, trueMarker: 859 return structform.BoolType 860 case charMarker: 861 return structform.ByteType 862 case int8Marker: 863 return structform.Int8Type 864 case uint8Marker: 865 return structform.Uint8Type 866 case int16Marker: 867 return structform.Int16Type 868 case int32Marker: 869 return structform.Int32Type 870 case int64Marker: 871 return structform.Int64Type 872 case float32Marker: 873 return structform.Float32Type 874 case float64Marker: 875 return structform.Float64Type 876 case highPrecMarker, stringMarker: 877 return structform.StringType 878 default: 879 return structform.AnyType 880 } 881 } 882 883 func (st state) withStep(s stateStep) state { 884 st.stateStep = s 885 return st 886 }