github.com/urso/go-structform@v0.0.2/cborl/parse.go (about) 1 package cborl 2 3 import ( 4 "encoding/binary" 5 "io" 6 "math" 7 8 structform "github.com/urso/go-structform" 9 ) 10 11 type Parser struct { 12 visitor structform.Visitor 13 strVisitor structform.StringRefVisitor 14 15 // last fail state 16 err error 17 18 // parser state machine 19 state stateStack 20 21 length lengthStack 22 23 buffer []byte 24 buffer0 [64]byte 25 } 26 27 type state struct { 28 major uint8 29 minor uint8 30 } 31 32 // additional parser state 'major' types 33 const ( 34 stFail uint8 = 1 35 stValue uint8 = 2 36 stLen uint8 = 3 37 stStartX uint8 = 4 38 stIndef uint8 = 1 39 stStartArr uint8 = majorArr | stStartX 40 stStartMap uint8 = majorMap | stStartX 41 stStartIndefArr uint8 = majorArr | stStartX | stIndef 42 stStartIndefMap uint8 = majorMap | stStartX | stIndef 43 stKey uint8 = majorMap | 8 44 stElem uint8 = majorMap | 9 45 ) 46 47 const ( 48 stStart uint8 = iota + 1 49 stCont 50 ) 51 52 func NewParser(vs structform.Visitor) *Parser { 53 p := &Parser{} 54 p.init(vs) 55 return p 56 } 57 58 func ParseReader(in io.Reader, vs structform.Visitor) (int64, error) { 59 p := NewParser(vs) 60 i, err := io.Copy(p, in) 61 return i, err 62 } 63 64 func Parse(b []byte, vs structform.Visitor) error { 65 return NewParser(vs).Parse(b) 66 } 67 68 func ParseString(str string, vs structform.Visitor) error { 69 return NewParser(vs).ParseString(str) 70 } 71 72 func (p *Parser) init(vs structform.Visitor) { 73 *p = Parser{ 74 visitor: vs, 75 strVisitor: structform.MakeStringRefVisitor(vs), 76 } 77 p.buffer = p.buffer0[:0] 78 p.length.init() 79 p.state.init(state{stValue, stStart}) 80 } 81 82 func (p *Parser) Write(b []byte) (int, error) { 83 p.err = p.feed(b) 84 if p.err != nil { 85 return 0, p.err 86 } 87 return len(b), nil 88 } 89 90 func (p *Parser) ParseString(str string) error { 91 return p.Parse(str2Bytes(str)) 92 } 93 94 func (p *Parser) Parse(b []byte) error { 95 return p.feed(b) 96 } 97 98 func (p *Parser) feed(b []byte) error { 99 for len(b) > 0 { 100 n, _, err := p.feedUntil(b) 101 if err != nil { 102 return err 103 } 104 105 b = b[n:] 106 } 107 108 return nil 109 } 110 111 func (p *Parser) feedUntil(b []byte) (int, bool, error) { 112 var ( 113 orig = b 114 done bool 115 err error 116 ) 117 118 for { 119 b, done, err = p.execStep(b) 120 if done || err != nil { 121 break 122 } 123 124 // continue parsing if input buffer is not empty, or structure with length 125 // fields must be initialized 126 // -> structures with length 0 will be reported immediately 127 contParse := len(b) != 0 || 128 (p.state.current.major&(stStartX|stIndef)) == stStartX 129 if !contParse { 130 break 131 } 132 } 133 return len(orig) - len(b), done, err 134 } 135 136 func (p *Parser) execStep(b []byte) ([]byte, bool, error) { 137 var ( 138 err error 139 done bool 140 ) 141 142 switch p.state.current.major { 143 case stFail: 144 return b, false, p.err 145 case stValue: 146 b, done, err = p.stepValue(b) 147 148 case stLen: 149 b = p.stepLen(b) 150 case majorUint: 151 b, done, err = p.stepUint(b) 152 case majorNeg: 153 b, done, err = p.stepNeg(b) 154 case codeSingleFloat: 155 b, done, err = p.stepSingleFloat(b) 156 case codeDoubleFloat: 157 b, done, err = p.stepDoubleFloat(b) 158 159 case majorBytes | stStartX: 160 if p.length.current == 0 { 161 err = p.visitor.OnArrayStart(0, structform.ByteType) 162 if err == nil { 163 err = p.visitor.OnArrayFinished() 164 p.length.pop() 165 if err == nil { 166 done, err = p.popState() 167 } 168 } 169 170 break 171 } 172 173 p.state.current.major &= ^stStartX 174 if len(b) == 0 { 175 break 176 } 177 fallthrough 178 case majorBytes: 179 b, done, err = p.stepBytes(b) 180 181 case majorText | stStartX: 182 if p.length.current == 0 { 183 p.length.pop() 184 err = p.visitor.OnString("") 185 if err == nil { 186 done, err = p.popState() 187 } 188 break 189 } 190 191 p.state.current.major &= ^stStartX 192 if len(b) == 0 { 193 break 194 } 195 fallthrough 196 case majorText: 197 b, done, err = p.stepText(b) 198 199 case stStartArr: 200 err = p.visitor.OnArrayStart(int(p.length.current), structform.AnyType) 201 if err != nil { 202 break 203 } 204 p.state.pop() 205 fallthrough 206 case majorArr: 207 b, done, err = p.stepArray(b) 208 209 case stStartIndefArr: 210 err = p.visitor.OnArrayStart(-1, structform.AnyType) 211 if err != nil { 212 break 213 } 214 p.state.pop() 215 fallthrough 216 case majorArr | stIndef: 217 if b[0] == codeBreak { 218 b = b[1:] 219 err = p.visitor.OnArrayFinished() 220 if err == nil { 221 done, err = p.popState() 222 } 223 } else { 224 b, done, err = p.stepValue(b) 225 } 226 227 case stStartMap: 228 err = p.visitor.OnObjectStart(int(p.length.current), structform.AnyType) 229 if err != nil { 230 break 231 } 232 p.state.pop() 233 fallthrough 234 case majorMap: 235 b, done, err = p.stepMap(b) 236 case stStartIndefMap: 237 err = p.visitor.OnObjectStart(-1, structform.AnyType) 238 if err != nil { 239 break 240 } 241 p.state.pop() 242 fallthrough 243 case majorMap | stIndef: 244 if b[0] == codeBreak { 245 err = p.visitor.OnObjectFinished() 246 b = b[1:] 247 if err == nil { 248 done, err = p.popState() 249 } 250 } else { 251 b, done, err = p.initMapKey(b) 252 } 253 case stKey | stStartX: 254 if p.length.current == 0 { 255 err = errEmptyKey 256 break 257 } 258 259 p.state.current.major &= (^stStartX) 260 fallthrough 261 case stKey: 262 b, done, err = p.stepKey(b) 263 case stElem: 264 p.state.pop() 265 b, done, err = p.stepValue(b) 266 267 default: 268 err = errTODO() 269 } 270 271 return b, done, err 272 } 273 274 func (p *Parser) popState() (bool, error) { 275 p.state.pop() 276 return p.onValue() 277 } 278 279 func (p *Parser) onValue() (bool, error) { 280 switch p.state.current.major { 281 case majorArr: 282 p.length.current-- 283 _, done, err := p.arrayHandleLen() 284 return done, err 285 286 case majorMap: 287 p.length.current-- 288 _, done, err := p.mapHandleLen() 289 return done, err 290 291 case majorArr | stIndef, majorMap | stIndef: 292 return false, nil 293 } 294 return true, nil 295 } 296 297 func (p *Parser) stepValue(b []byte) ([]byte, bool, error) { 298 if len(b) == 0 { 299 return b, false, nil 300 } 301 302 major := b[0] & majorMask 303 switch major { 304 case majorUint: 305 if b[0] < len8b { 306 err := p.visitor.OnUint8(b[0]) 307 done := false 308 if err == nil { 309 done, err = p.onValue() 310 } 311 return b[1:], done, err 312 } 313 314 p.state.push(state{major, b[0] & minorMask}) 315 return b[1:], false, nil 316 317 case majorNeg: 318 minor := b[0] & minorMask 319 if v := minor; v < len8b { 320 err := p.visitor.OnInt8(int8(^v)) 321 done := false 322 if err == nil { 323 done, err = p.onValue() 324 } 325 return b[1:], done, err 326 } 327 328 p.state.push(state{major, minor}) 329 return b[1:], false, nil 330 331 case majorBytes, majorText: 332 minor := b[0] & minorMask 333 if minor == lenIndef { 334 return nil, false, errIndefByteSeq 335 } else { 336 return p.initByteSeq(major, minor, b[1:]) 337 } 338 339 case majorArr, majorMap: 340 minor := b[0] & minorMask 341 return p.initSub(major, minor, b[1:]) 342 343 case majorTag: 344 return nil, false, errTODO() 345 346 default: 347 var ( 348 err error 349 done bool 350 ) 351 352 switch b[0] { 353 case codeFalse: 354 err = p.visitor.OnBool(false) 355 if err == nil { 356 done, err = p.onValue() 357 } 358 return b[1:], done, err 359 case codeTrue: 360 err = p.visitor.OnBool(true) 361 if err == nil { 362 done, err = p.onValue() 363 } 364 return b[1:], done, err 365 case codeNull, codeUndef: 366 err = p.visitor.OnNil() 367 if err == nil { 368 done, err = p.onValue() 369 } 370 return b[1:], done, err 371 case codeHalfFloat: 372 return b[1:], false, errTODO() 373 case codeSingleFloat, codeDoubleFloat: 374 p.state.push(state{b[0], stStart}) 375 return b[1:], false, nil 376 } 377 } 378 return nil, false, errInvalidCode 379 } 380 381 func (p *Parser) stepUint(in []byte) (b []byte, done bool, err error) { 382 b = in 383 switch p.state.current.minor { 384 case len8b: 385 b, done, err = b[1:], true, p.visitor.OnUint8(b[0]) 386 case len16b: 387 var v uint16 388 if b, done, v = p.getUint16(b); done { 389 err = p.visitor.OnUint16(v) 390 } 391 case len32b: 392 var v uint32 393 if b, done, v = p.getUint32(b); done { 394 err = p.visitor.OnUint32(v) 395 } 396 case len64b: 397 var v uint64 398 if b, done, v = p.getUint64(b); done { 399 err = p.visitor.OnUint64(v) 400 } 401 } 402 403 if done && err == nil { 404 done, err = p.popState() 405 } 406 407 return 408 } 409 410 func (p *Parser) stepBytes(b []byte) ([]byte, bool, error) { 411 // stream raw bytes via array visitor 412 413 var ( 414 st = &p.state.current 415 err error 416 ) 417 418 if st.minor == stStart { 419 err = p.visitor.OnArrayStart(int(p.length.current), structform.ByteType) 420 if err != nil { 421 return nil, false, err 422 } 423 st.minor = stCont 424 } 425 426 L := int(p.length.current) 427 done := len(b) >= L 428 if !done { 429 L = len(b) 430 p.length.current -= int64(L) 431 } 432 433 for _, c := range b[:L] { 434 if err := p.visitor.OnByte(c); err != nil { 435 return nil, false, err 436 } 437 } 438 439 b = b[L:] 440 if done { 441 err = p.visitor.OnArrayFinished() 442 p.length.pop() 443 if err == nil { 444 done, err = p.popState() 445 } 446 } 447 return b, done, err 448 } 449 450 func (p *Parser) stepText(b []byte) ([]byte, bool, error) { 451 b, tmp := p.collect(b, int(p.length.current)) 452 if tmp == nil { 453 return nil, false, nil 454 } 455 456 p.length.pop() 457 458 done := true 459 err := p.strVisitor.OnStringRef(tmp) 460 if err == nil { 461 done, err = p.popState() 462 } 463 return b, done, err 464 } 465 466 func (p *Parser) stepArray(b []byte) ([]byte, bool, error) { 467 val, done, err := p.arrayHandleLen() 468 if val { 469 b, done, err = p.stepValue(b) 470 } 471 return b, done, err 472 } 473 474 func (p *Parser) arrayHandleLen() (value, done bool, err error) { 475 if p.length.current > 0 { 476 return true, false, nil 477 } 478 479 err = p.visitor.OnArrayFinished() 480 if err == nil { 481 p.length.pop() 482 done, err = p.popState() 483 } 484 485 return false, done, err 486 } 487 488 func (p *Parser) stepMap(b []byte) ([]byte, bool, error) { 489 kv, done, err := p.mapHandleLen() 490 if kv && len(b) > 0 { 491 b, done, err = p.initMapKey(b) 492 } 493 return b, done, err 494 } 495 496 func (p *Parser) mapHandleLen() (kv, done bool, err error) { 497 if p.length.current > 0 { 498 return true, false, nil 499 } 500 501 err = p.visitor.OnObjectFinished() 502 if err == nil { 503 p.length.pop() 504 done, err = p.popState() 505 } 506 return false, done, err 507 } 508 509 func (p *Parser) initMapKey(b []byte) ([]byte, bool, error) { 510 // parse key: 511 major := b[0] & majorMask 512 if major != majorText { 513 return nil, false, errTextKeyRequired 514 } 515 516 minor := b[0] & minorMask 517 if minor == lenIndef { 518 return nil, false, errIndefByteSeq 519 } 520 521 return p.initByteSeq(stKey, minor, b[1:]) 522 } 523 524 func (p *Parser) stepKey(b []byte) ([]byte, bool, error) { 525 b, tmp := p.collect(b, int(p.length.current)) 526 if tmp == nil { 527 return nil, false, nil 528 } 529 530 err := p.strVisitor.OnKeyRef(tmp) 531 if err == nil { 532 p.length.pop() 533 p.state.current.major = stElem 534 } 535 return b, false, err 536 } 537 538 func (p *Parser) initByteSeq(major, minor uint8, b []byte) ([]byte, bool, error) { 539 if v := minor; v < len8b { 540 p.state.push(state{major | stStartX, stStart}) 541 p.length.push(int64(v)) 542 return b, false, nil 543 } 544 545 p.state.push(state{major | stStartX, stStart}) 546 p.state.push(state{stLen, minor}) 547 return b, false, nil 548 } 549 550 func (p *Parser) initSub(major, minor uint8, b []byte) ([]byte, bool, error) { 551 if minor == lenIndef { 552 // TODO: replace 2 state pushes with 1 state push + mask removing startX from current state 553 p.state.push(state{major | stIndef, stStart}) 554 p.state.push(state{major | stStartX | stIndef, stStart}) 555 return b, false, nil 556 } 557 558 if v := minor; v < len8b { 559 p.state.push(state{major, stStart}) 560 p.state.push(state{major | stStartX, stStart}) 561 p.length.push(int64(v)) 562 return b, false, nil 563 } 564 565 p.state.push(state{major, stStart}) 566 p.state.push(state{major | stStartX, stStart}) 567 p.state.push(state{stLen, minor}) 568 return b, false, nil 569 } 570 571 func (p *Parser) stepLen(b []byte) []byte { 572 var done bool 573 574 switch p.state.current.minor { 575 case len8b: 576 p.length.push(int64(b[0])) 577 b, done = b[1:], true 578 case len16b: 579 var v uint16 580 if b, done, v = p.getUint16(b); done { 581 p.length.push(int64(v)) 582 } 583 case len32b: 584 var v uint32 585 if b, done, v = p.getUint32(b); done { 586 p.length.push(int64(v)) 587 } 588 589 case len64b: 590 var v uint64 591 if b, done, v = p.getUint64(b); done { 592 p.length.push(int64(v)) 593 } 594 } 595 596 if done { 597 p.state.pop() 598 } 599 return b 600 } 601 602 func (p *Parser) stepNeg(in []byte) (b []byte, done bool, err error) { 603 b = in 604 switch p.state.current.minor { 605 case len8b: 606 b, done, err = b[1:], true, p.visitor.OnInt8(int8(^b[0])) 607 case len16b: 608 var v uint16 609 if b, done, v = p.getUint16(b); done { 610 err = p.visitor.OnInt16(int16(^v)) 611 } 612 case len32b: 613 var v uint32 614 if b, done, v = p.getUint32(b); done { 615 err = p.visitor.OnInt32(int32(^v)) 616 } 617 case len64b: 618 var v uint64 619 if b, done, v = p.getUint64(b); done { 620 err = p.visitor.OnInt64(int64(^v)) 621 } 622 } 623 624 if done && err == nil { 625 done, err = p.popState() 626 } 627 return 628 } 629 630 func (p *Parser) stepSingleFloat(in []byte) (b []byte, done bool, err error) { 631 var tmp uint32 632 if b, done, tmp = p.getUint32(in); done { 633 err = p.visitor.OnFloat32(math.Float32frombits(tmp)) 634 if err == nil { 635 done, err = p.popState() 636 } 637 } 638 return 639 } 640 641 func (p *Parser) stepDoubleFloat(in []byte) (b []byte, done bool, err error) { 642 var tmp uint64 643 if b, done, tmp = p.getUint64(in); done { 644 err = p.visitor.OnFloat64(math.Float64frombits(tmp)) 645 if err == nil { 646 done, err = p.popState() 647 } 648 } 649 return 650 } 651 652 func (p *Parser) getUint8(b []byte) ([]byte, bool, uint8) { 653 return b[1:], true, b[0] 654 } 655 656 func (p *Parser) getUint16(b []byte) ([]byte, bool, uint16) { 657 b, tmp := p.collect(b, 2) 658 if tmp == nil { 659 return nil, false, 0 660 } 661 return b, true, binary.BigEndian.Uint16(tmp) 662 } 663 664 func (p *Parser) getUint32(b []byte) ([]byte, bool, uint32) { 665 b, tmp := p.collect(b, 4) 666 if tmp == nil { 667 return b, false, 0 668 } 669 670 return b, true, binary.BigEndian.Uint32(tmp) 671 } 672 673 func (p *Parser) getUint64(b []byte) ([]byte, bool, uint64) { 674 b, tmp := p.collect(b, 8) 675 if tmp == nil { 676 return nil, false, 0 677 } 678 return b, true, binary.BigEndian.Uint64(tmp) 679 } 680 681 func (p *Parser) collect(b []byte, count int) ([]byte, []byte) { 682 if len(p.buffer) > 0 { 683 delta := count - len(p.buffer) 684 if delta > 0 { 685 N := delta 686 complete := true 687 if N > len(b) { 688 complete = false 689 N = len(b) 690 } 691 692 p.buffer = append(p.buffer, b[:N]...) 693 if !complete { 694 return nil, nil 695 } 696 697 // advance read buffer 698 b = b[N:] 699 } 700 701 if len(p.buffer) >= count { 702 tmp := p.buffer[:count] 703 if len(p.buffer) == count { 704 p.buffer = p.buffer0[:0] 705 } else { 706 p.buffer = p.buffer[count:] 707 } 708 return b, tmp 709 } 710 } 711 712 if len(b) >= count { 713 return b[count:], b[:count] 714 } 715 716 p.buffer = append(p.buffer, b...) 717 return nil, nil 718 } 719 720 func numBytes(code uint8) uint8 { 721 return 1 << ((code & minorMask) - len8b) 722 } 723 724 func readInt16(b []byte) int16 { return int16(^readUint16(b)) } 725 func readInt32(b []byte) int32 { return int32(^readUint32(b)) } 726 func readInt64(b []byte) int64 { return int64(^readUint64(b)) } 727 728 func readUint16(b []byte) uint16 { return binary.BigEndian.Uint16(b) } 729 func readUint32(b []byte) uint32 { return binary.BigEndian.Uint32(b) } 730 func readUint64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }