github.com/intel-go/fastjson@v0.0.0-20170329170629-f846ae58a1ab/scanner.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package fastjson 6 7 // JSON value parser state machine. 8 // Just about at the limit of what is reasonable to write by hand. 9 // Some parts are a bit tedious, but overall it nicely factors out the 10 // otherwise common code from the multiple scanning functions 11 // in this package (Compact, Indent, checkValid, nextValue, etc). 12 // 13 // This file starts with two simple examples using the scanner 14 // before diving into the scanner itself. 15 16 import ( 17 "bytes" 18 "strconv" 19 ) 20 21 // checkValid verifies that data is valid JSON-encoded data. 22 // scan is passed in for use by checkValid to avoid an allocation. 23 func checkValid(data []byte, scan *scanner) error { 24 scan.length_data = len(data) 25 scan.reset() 26 scan.endTop = true 27 stream := streamByte{data: &data, pos: 0} 28 op := scan.parseValue(&stream) 29 30 if op == scanError { 31 return scan.err 32 } 33 34 return nil 35 } 36 37 // A SyntaxError is a description of a JSON syntax error. 38 type SyntaxError struct { 39 msg string // description of error 40 Offset int64 // error occurred after reading Offset bytes 41 } 42 43 type Record struct { 44 state int 45 pos int 46 } 47 48 func (e *SyntaxError) Error() string { return e.msg } 49 50 // A scanner is a JSON scanning state machine. 51 // Callers call scan.reset() and then pass bytes in one at a time 52 // by calling scan.step(&scan, c) for each byte. 53 // The return value, referred to as an opcode, tells the 54 // caller about significant parsing events like beginning 55 // and ending literals, objects, and arrays, so that the 56 // caller can follow along if it wishes. 57 // The return value scanEnd indicates that a single top-level 58 // JSON value has been completed, *before* the byte that 59 // just got passed in. (The indication must be delayed in order 60 // to recognize the end of numbers: is 123 a whole value or 61 // the beginning of 12345e+6?). 62 type scanner struct { 63 // The step is a func to be called to execute the next transition. 64 // Also tried using an integer constant and a single func 65 // with a switch, but using the func directly was 10% faster 66 // on a 64-bit Mac Mini, and it's nicer to read. 67 step func(*scanner, byte) int 68 69 // Reached end of top-level value. 70 endTop bool 71 72 // Stack of what we're in the middle of - array values, object keys, object values. 73 parseState []int 74 75 // Error that happened, if any. 76 err error 77 78 stateRecord []Record//array of records of labels(position in array and state on this position) 79 cacheRecord Record 80 cached bool 81 readPos int //position in array stateRecord during filling 82 length_data int //length of data to read, initialized in unmarshal. Helps to set correct capacity of stateRecord 83 inNumber bool // flag of parsing figure 84 endLiteral bool //flag of finishing literal 85 86 bytes int64 // total bytes consumed, updated by decoder.Decode 87 } 88 89 // These values are returned by the state transition functions 90 // assigned to scanner.state and the method scanner.eof. 91 // They give details about the current state of the scan that 92 // callers might be interested to know about. 93 // It is okay to ignore the return value of any particular 94 // call to scanner.state: if one call returns scanError, 95 // every subsequent call will return scanError too. 96 const ( 97 scanBeginLiteral = iota // end implied by next result != scanContinue 98 scanEndLiteral // not returned by scanner, but clearer for state recording 99 scanBeginObject // begin object 100 scanEndObject // end object (implies scanObjectValue if possible) 101 scanBeginArray // begin array 102 scanEndArray // end array (implies scanArrayValue if possible) 103 scanObjectKey // just finished object key (string) 104 scanObjectValue // just finished non-last object value 105 scanContinue // uninteresting byte 106 scanArrayValue // just finished array value 107 108 scanSkipSpace // space byte; can skip; known to be last "continue" result 109 110 // Stop. 111 scanEnd // top-level value ended *before* this byte; known to be first "stop" result 112 scanError // hit an error, scanner.err. 113 ) 114 115 // These values are stored in the parseState stack. 116 // They give the current state of a composite value 117 // being scanned. If the parser is inside a nested value 118 // the parseState describes the nested state, outermost at entry 0. 119 const ( 120 parseObjectKey = iota // parsing object key (before colon) 121 parseObjectValue // parsing object value (after colon) 122 parseArrayValue // parsing array value 123 ) 124 125 type streamByte struct { 126 data *[]byte 127 pos int 128 } 129 130 func (s *streamByte) isEnd() bool { 131 return s.pos >= len(*s.data) 132 } 133 134 func (s *streamByte) Take() byte { 135 result := s.Peek() 136 s.pos++ 137 return result 138 } 139 140 func (s *streamByte) Peek() byte { 141 if !s.isEnd() { 142 return (*s.data)[s.pos] 143 } else { 144 return 0//I have to leave this case, because I can call Peek, when the stream is over. I won't use value it returns, but it should be protected 145 } 146 } 147 148 func (sb *streamByte) skipSpaces() { 149 for c := sb.Peek(); c <= ' ' && isSpace(c); { 150 sb.pos++ 151 c = sb.Peek() 152 } 153 } 154 155 const AVERAGE_LENGTH = 10000 156 157 // reset prepares the scanner for use. 158 // It must be called before calling s.step. 159 func (s *scanner) reset() { 160 s.step = stateBeginValue 161 s.parseState = s.parseState[0:0] 162 s.err = nil 163 if s.isRecordEmpty() { 164 if s.length_data >= AVERAGE_LENGTH { 165 s.stateRecord = make([]Record, 0, s.length_data/4) //capacity doesn't depends on the length whole value, but on the length of nested values. But predictively the large values have large nested values. 166 } else { 167 s.stateRecord = make([]Record, 0, s.length_data/2) 168 } 169 } 170 s.inNumber = false 171 s.endLiteral = false 172 s.cached = false 173 s.readPos = 0 174 s.endTop = false 175 } 176 177 // eof tells the scanner that the end of input has been reached. 178 // It returns a scan status just as s.step does. 179 func (s *scanner) eof() int { 180 if s.err != nil { 181 return scanError 182 } 183 if s.endTop { 184 return scanEnd 185 } 186 s.step(s, ' ') 187 if s.endTop { 188 return scanEnd 189 } 190 if s.err == nil { 191 s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} 192 } 193 return scanError 194 } 195 196 // pushParseState pushes a new parse state p onto the parse stack. 197 func (s *scanner) pushParseState(p int) { 198 s.parseState = append(s.parseState, p) 199 } 200 201 // popParseState pops a parse state (already obtained) off the stack 202 // and updates s.step accordingly. 203 func (s *scanner) popParseState() { 204 n := len(s.parseState) - 1 205 s.parseState = s.parseState[0:n] 206 if n == 0 { 207 s.step = stateEndTop 208 s.endTop = true 209 } else { 210 s.step = stateEndValue 211 } 212 } 213 214 //checks if array of records is empty 215 func (s *scanner) isRecordEmpty() bool { 216 return len(s.stateRecord) == 0 217 } 218 219 //pushes Record into array 220 func (s *scanner) pushRecord(state, pos int) { 221 s.stateRecord = append(s.stateRecord, Record{state:state, pos:pos}) //state are at even positions, pos are at odd positions in stateRecord array 222 } 223 224 //peeks current state for filling object. Doesn't change position. Returns state, pos 225 func (s *scanner) peekPos() int { 226 if s.readPos >= len(s.stateRecord){ 227 return s.cacheRecord.pos// peek can be called when the array is over , only if unmarshal error occured, so return last read position 228 } 229 if !s.cached { 230 s.cached = true 231 s.cacheRecord = s.stateRecord[s.readPos] 232 } 233 return s.cacheRecord.pos 234 } 235 236 func (s *scanner) peekState() int { 237 if s.readPos >= len(s.stateRecord) { 238 return s.cacheRecord.state // the same as Peek 239 } 240 if !s.cached { 241 s.cached = true 242 s.cacheRecord = s.stateRecord[s.readPos] 243 } 244 return s.cacheRecord.state 245 } 246 247 //takes current state and increments reading position. 248 func (s *scanner) takeState() int { 249 if s.cached { 250 s.cached = false 251 }else{ 252 s.peekState() 253 } 254 s.readPos += 1 255 return s.cacheRecord.state 256 } 257 258 func (s *scanner) takePos() int { 259 if s.cached { 260 s.cached = false 261 }else{ 262 s.peekState() 263 } 264 s.readPos += 1 265 return s.cacheRecord.pos 266 } 267 268 func (s *scanner) skipRecord() { 269 s.readPos += 1 270 s.cached = false 271 } 272 273 //checks if we need this state to be recorded 274 func (s *scanner) isNeededState(state int) bool { 275 if s.endLiteral { 276 return true 277 } 278 if state > scanEndArray || state < scanBeginLiteral { 279 return false 280 } 281 return true 282 } 283 284 func (s *scanner) fillRecord(pos, state int) { 285 286 if s.isNeededState(state) { 287 if s.inNumber && s.endLiteral { // in case 2] , 2} or 2, 288 s.inNumber = false 289 s.endLiteral = false 290 s.pushRecord(scanEndLiteral, pos-1) 291 if s.isNeededState(state) { // in case 2] or 2} 292 s.pushRecord(state, pos) 293 } 294 return 295 } 296 297 if s.endLiteral { 298 s.endLiteral = false 299 state = scanEndLiteral 300 } 301 s.pushRecord(state, pos) 302 } 303 304 } 305 306 func isSpace(c byte) bool { 307 return c == ' ' || c == '\t' || c == '\r' || c == '\n' 308 } 309 310 // stateBeginValueOrEmpty is the state after reading `[`. 311 func stateBeginValueOrEmpty(s *scanner, c byte) int { 312 if c <= ' ' && isSpace(c) { 313 return scanSkipSpace 314 } 315 if c == ']' { 316 return stateEndValue(s, c) 317 } 318 return stateBeginValue(s, c) 319 } 320 321 // stateBeginValue is the state at the beginning of the input. 322 func stateBeginValue(s *scanner, c byte) int { 323 if c <= ' ' && isSpace(c) { 324 return scanSkipSpace 325 } 326 switch c { 327 case '{': 328 s.step = stateBeginStringOrEmpty 329 s.pushParseState(parseObjectKey) 330 return scanBeginObject 331 case '[': 332 s.step = stateBeginValueOrEmpty 333 s.pushParseState(parseArrayValue) 334 return scanBeginArray 335 case '"': 336 s.step = stateInString 337 return scanBeginLiteral 338 case '-': 339 s.step = stateNeg 340 s.inNumber = true 341 return scanBeginLiteral 342 case '0': // beginning of 0.123 343 s.step = state0 344 s.inNumber = true 345 return scanBeginLiteral 346 case 't': // beginning of true 347 s.step = stateT 348 return scanBeginLiteral 349 case 'f': // beginning of false 350 s.step = stateF 351 return scanBeginLiteral 352 case 'n': // beginning of null 353 s.step = stateN 354 return scanBeginLiteral 355 } 356 if '1' <= c && c <= '9' { // beginning of 1234.5 357 s.step = state1 358 s.inNumber = true 359 return scanBeginLiteral 360 } 361 return s.error(c, "looking for beginning of value") 362 } 363 364 // stateBeginStringOrEmpty is the state after reading `{`. 365 func stateBeginStringOrEmpty(s *scanner, c byte) int { 366 if c <= ' ' && isSpace(c) { 367 return scanSkipSpace 368 } 369 if c == '}' { 370 n := len(s.parseState) 371 s.parseState[n-1] = parseObjectValue 372 return stateEndValue(s, c) 373 } 374 return stateBeginString(s, c) 375 } 376 377 // stateBeginString is the state after reading `{"key": value,`. 378 func stateBeginString(s *scanner, c byte) int { 379 if c <= ' ' && isSpace(c) { 380 return scanSkipSpace 381 } 382 if c == '"' { 383 s.step = stateInString 384 return scanBeginLiteral 385 } 386 return s.error(c, "looking for beginning of object key string") 387 } 388 389 // stateEndValue is the state after completing a value, 390 // such as after reading `{}` or `true` or `["x"`. 391 func stateEndValue(s *scanner, c byte) int { 392 n := len(s.parseState) 393 if n == 0 { 394 // Completed top-level before the current byte. 395 s.step = stateEndTop 396 s.endTop = true 397 return stateEndTop(s, c) 398 } 399 if c <= ' ' && isSpace(c) { 400 s.step = stateEndValue 401 return scanSkipSpace 402 } 403 ps := s.parseState[n-1] 404 switch ps { 405 case parseObjectKey: 406 if c == ':' { 407 s.parseState[n-1] = parseObjectValue 408 s.step = stateBeginValue 409 return scanObjectKey 410 } 411 return s.error(c, "after object key") 412 case parseObjectValue: 413 if c == ',' { 414 s.parseState[n-1] = parseObjectKey 415 s.step = stateBeginString 416 return scanObjectValue 417 } 418 if c == '}' { 419 s.popParseState() 420 return scanEndObject 421 } 422 return s.error(c, "after object key:value pair") 423 case parseArrayValue: 424 if c == ',' { 425 s.step = stateBeginValue 426 return scanArrayValue 427 } 428 if c == ']' { 429 s.popParseState() 430 return scanEndArray 431 } 432 return s.error(c, "after array element") 433 } 434 return s.error(c, "") 435 } 436 437 // stateEndTop is the state after finishing the top-level value, 438 // such as after reading `{}` or `[1,2,3]`. 439 // Only space characters should be seen now. 440 func stateEndTop(s *scanner, c byte) int { 441 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 442 // Complain about non-space byte on next call. 443 s.error(c, "after top-level value") 444 } 445 return scanEnd 446 } 447 448 // stateInString is the state after reading `"`. 449 func stateInString(s *scanner, c byte) int { 450 if c == '"' { 451 s.step = stateEndValue 452 s.endLiteral = true 453 return scanContinue 454 } 455 if c == '\\' { 456 s.step = stateInStringEsc 457 return scanContinue 458 } 459 if c < 0x20 { 460 return s.error(c, "in string literal") 461 } 462 return scanContinue 463 } 464 465 // stateInStringEsc is the state after reading `"\` during a quoted string. 466 func stateInStringEsc(s *scanner, c byte) int { 467 switch c { 468 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': 469 s.step = stateInString 470 return scanContinue 471 case 'u': 472 s.step = stateInStringEscU 473 return scanContinue 474 } 475 return s.error(c, "in string escape code") 476 } 477 478 // stateInStringEscU is the state after reading `"\u` during a quoted string. 479 func stateInStringEscU(s *scanner, c byte) int { 480 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 481 s.step = stateInStringEscU1 482 return scanContinue 483 } 484 // numbers 485 return s.error(c, "in \\u hexadecimal character escape") 486 } 487 488 // stateInStringEscU1 is the state after reading `"\u1` during a quoted string. 489 func stateInStringEscU1(s *scanner, c byte) int { 490 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 491 s.step = stateInStringEscU12 492 return scanContinue 493 } 494 // numbers 495 return s.error(c, "in \\u hexadecimal character escape") 496 } 497 498 // stateInStringEscU12 is the state after reading `"\u12` during a quoted string. 499 func stateInStringEscU12(s *scanner, c byte) int { 500 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 501 s.step = stateInStringEscU123 502 return scanContinue 503 } 504 // numbers 505 return s.error(c, "in \\u hexadecimal character escape") 506 } 507 508 // stateInStringEscU123 is the state after reading `"\u123` during a quoted string. 509 func stateInStringEscU123(s *scanner, c byte) int { 510 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 511 s.step = stateInString 512 return scanContinue 513 } 514 // numbers 515 return s.error(c, "in \\u hexadecimal character escape") 516 } 517 518 // stateNeg is the state after reading `-` during a number. 519 func stateNeg(s *scanner, c byte) int { 520 if c == '0' { 521 s.step = state0 522 return scanContinue 523 } 524 if '1' <= c && c <= '9' { 525 s.step = state1 526 return scanContinue 527 } 528 return s.error(c, "in numeric literal") 529 } 530 531 // state1 is the state after reading a non-zero integer during a number, 532 // such as after reading `1` or `100` but not `0`. 533 func state1(s *scanner, c byte) int { 534 if '0' <= c && c <= '9' { 535 s.step = state1 536 return scanContinue 537 } 538 return state0(s, c) 539 } 540 541 // state0 is the state after reading `0` during a number. 542 func state0(s *scanner, c byte) int { 543 if c == '.' { 544 s.step = stateDot 545 return scanContinue 546 } 547 if c == 'e' || c == 'E' { 548 s.step = stateE 549 return scanContinue 550 } 551 s.endLiteral = true 552 return stateEndValue(s, c) 553 } 554 555 // stateDot is the state after reading the integer and decimal point in a number, 556 // such as after reading `1.`. 557 func stateDot(s *scanner, c byte) int { 558 if '0' <= c && c <= '9' { 559 s.step = stateDot0 560 return scanContinue 561 } 562 return s.error(c, "after decimal point in numeric literal") 563 } 564 565 // stateDot0 is the state after reading the integer, decimal point, and subsequent 566 // digits of a number, such as after reading `3.14`. 567 func stateDot0(s *scanner, c byte) int { 568 if '0' <= c && c <= '9' { 569 return scanContinue 570 } 571 if c == 'e' || c == 'E' { 572 s.step = stateE 573 return scanContinue 574 } 575 s.endLiteral = true 576 return stateEndValue(s, c) 577 } 578 579 // stateE is the state after reading the mantissa and e in a number, 580 // such as after reading `314e` or `0.314e`. 581 func stateE(s *scanner, c byte) int { 582 if c == '+' || c == '-' { 583 s.step = stateESign 584 return scanContinue 585 } 586 return stateESign(s, c) 587 } 588 589 // stateESign is the state after reading the mantissa, e, and sign in a number, 590 // such as after reading `314e-` or `0.314e+`. 591 func stateESign(s *scanner, c byte) int { 592 if '0' <= c && c <= '9' { 593 s.step = stateE0 594 return scanContinue 595 } 596 return s.error(c, "in exponent of numeric literal") 597 } 598 599 // stateE0 is the state after reading the mantissa, e, optional sign, 600 // and at least one digit of the exponent in a number, 601 // such as after reading `314e-2` or `0.314e+1` or `3.14e0`. 602 func stateE0(s *scanner, c byte) int { 603 if '0' <= c && c <= '9' { 604 return scanContinue 605 } 606 s.endLiteral = true 607 return stateEndValue(s, c) 608 } 609 610 // stateT is the state after reading `t`. 611 func stateT(s *scanner, c byte) int { 612 if c == 'r' { 613 s.step = stateTr 614 return scanContinue 615 } 616 return s.error(c, "in literal true (expecting 'r')") 617 } 618 619 // stateTr is the state after reading `tr`. 620 func stateTr(s *scanner, c byte) int { 621 if c == 'u' { 622 s.step = stateTru 623 return scanContinue 624 } 625 return s.error(c, "in literal true (expecting 'u')") 626 } 627 628 // stateTru is the state after reading `tru`. 629 func stateTru(s *scanner, c byte) int { 630 if c == 'e' { 631 s.step = stateEndValue 632 s.endLiteral = true 633 return scanContinue 634 } 635 return s.error(c, "in literal true (expecting 'e')") 636 } 637 638 // stateF is the state after reading `f`. 639 func stateF(s *scanner, c byte) int { 640 if c == 'a' { 641 s.step = stateFa 642 return scanContinue 643 } 644 return s.error(c, "in literal false (expecting 'a')") 645 } 646 647 // stateFa is the state after reading `fa`. 648 func stateFa(s *scanner, c byte) int { 649 if c == 'l' { 650 s.step = stateFal 651 return scanContinue 652 } 653 return s.error(c, "in literal false (expecting 'l')") 654 } 655 656 // stateFal is the state after reading `fal`. 657 func stateFal(s *scanner, c byte) int { 658 if c == 's' { 659 s.step = stateFals 660 return scanContinue 661 } 662 return s.error(c, "in literal false (expecting 's')") 663 } 664 665 // stateFals is the state after reading `fals`. 666 func stateFals(s *scanner, c byte) int { 667 if c == 'e' { 668 s.step = stateEndValue 669 s.endLiteral = true 670 return scanContinue 671 } 672 return s.error(c, "in literal false (expecting 'e')") 673 } 674 675 // stateN is the state after reading `n`. 676 func stateN(s *scanner, c byte) int { 677 if c == 'u' { 678 s.step = stateNu 679 return scanContinue 680 } 681 return s.error(c, "in literal null (expecting 'u')") 682 } 683 684 // stateNu is the state after reading `nu`. 685 func stateNu(s *scanner, c byte) int { 686 if c == 'l' { 687 s.step = stateNul 688 return scanContinue 689 } 690 return s.error(c, "in literal null (expecting 'l')") 691 } 692 693 // stateNul is the state after reading `nul`. 694 func stateNul(s *scanner, c byte) int { 695 if c == 'l' { 696 s.step = stateEndValue 697 s.endLiteral = true 698 return scanContinue 699 } 700 return s.error(c, "in literal null (expecting 'l')") 701 } 702 703 // stateError is the state after reaching a syntax error, 704 // such as after reading `[1}` or `5.1.2`. 705 func stateError(s *scanner, c byte) int { 706 return scanError 707 } 708 709 // error records an error and switches to the error state. 710 func (s *scanner) error(c byte, context string) int { 711 s.step = stateError 712 s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} 713 return scanError 714 } 715 716 // quoteChar formats c as a quoted character literal 717 func quoteChar(c byte) string { 718 // special cases - different from quoted strings 719 if c == '\'' { 720 return `'\''` 721 } 722 if c == '"' { 723 return `'"'` 724 } 725 726 // use quoted string with different quotation marks 727 s := strconv.Quote(string(c)) 728 return "'" + s[1:len(s)-1] + "'" 729 } 730 731 func (sb *streamByte) error(s *scanner, context string) int { 732 s.err = &SyntaxError{"invalid character " + quoteChar(sb.Peek()) + " " + context, int64(sb.pos + 1)} 733 return scanError 734 } 735 736 func (s *scanner) parseSimpleLiteral(sb *streamByte, length int) int { 737 if len(*sb.data) < sb.pos+length { 738 s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))} 739 return scanError 740 } 741 s.pushRecord(scanBeginLiteral, sb.pos) 742 sb.Take() 743 s.bytes = int64(sb.pos) 744 for i := 0; i < length-1; i++ { 745 s.bytes++ 746 op := s.step(s, sb.Take()) 747 if op == scanError { 748 return op 749 } 750 } 751 s.pushRecord(scanEndLiteral, sb.pos-1) 752 return scanContinue 753 } 754 755 func (s *scanner) parseValue(sb *streamByte) int { 756 sb.skipSpaces() 757 topValue := s.endTop 758 if len(*sb.data) <= sb.pos { 759 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 760 return scanError 761 } 762 cur := sb.Peek() 763 s.endTop = false 764 op := scanContinue 765 switch cur { 766 case '"': 767 op = s.parseString(sb) 768 case '{': 769 op = s.parseObject(sb) 770 case '[': 771 op = s.parseArray(sb) 772 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 773 op = s.parseNumber(sb) 774 case 't': 775 s.step = stateT 776 op = s.parseSimpleLiteral(sb, 4) 777 778 case 'f': 779 s.step = stateF 780 op = s.parseSimpleLiteral(sb, 5) 781 case 'n': 782 s.step = stateN 783 op = s.parseSimpleLiteral(sb, 4) 784 default: 785 return sb.error(s, "looking for beginning of value") 786 } 787 788 if topValue && op != scanError { 789 sb.skipSpaces() 790 if !sb.isEnd() { 791 return sb.error(s, "after top-level value") 792 } 793 } 794 795 return op 796 } 797 798 func (s *scanner) parseString(sb *streamByte) int { 799 s.pushRecord(scanBeginLiteral, sb.pos) 800 sb.pos++ //skip " 801 quotePos := bytes.IndexByte((*sb.data)[sb.pos:], '"') 802 if quotePos < 0 { 803 s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))} 804 return scanError 805 806 } 807 808 // in case without escape symbol \". Errors inside string will be handled during object filling, with function unquote 809 // it's done in sake of speed 810 sb.pos += quotePos - 1 811 for sb.Peek() == '\\' { //pos on the symbol before " 812 //it may escape symbol " 813 sb.pos-- 814 sum := 1 815 816 //checking multiple symbols \, kind of "...\\"..." 817 for sb.Peek() == '\\' { 818 sum++ 819 sb.pos-- 820 } 821 if sum%2 == 0 { //even number of \, last of them doesn't escape "; it means that current qoute pos is end of string 822 sb.pos += sum 823 break 824 } 825 //otherwise odd number of \ escapes ". Looking for the next " 826 sb.pos += sum + 1 // pos on " 827 n := bytes.IndexByte((*sb.data)[sb.pos+1:], '"') 828 if n < 0 { 829 s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))} 830 return scanError 831 } 832 sb.pos += n 833 } 834 //here pos is on the symbol before " 835 sb.pos += 2 836 s.pushRecord(scanEndLiteral, sb.pos-1) 837 return scanEndLiteral 838 } 839 840 func (s *scanner) parseNumber(sb *streamByte) int { 841 s.pushRecord(scanBeginLiteral, sb.pos) 842 cur := sb.Take() 843 if cur == '-' { 844 if sb.isEnd() { 845 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 846 return scanError 847 } 848 cur = sb.Take() 849 } 850 if sb.isEnd() { 851 if '0' <= cur && cur <= '9' { 852 s.pushRecord(scanEndLiteral, sb.pos-1) 853 return scanEndLiteral 854 } else { 855 sb.pos-- 856 return sb.error(s, "in numeric literal") 857 } 858 } 859 if !sb.isEnd() && '1' <= cur && cur <= '9' { 860 sb.parseFigures() 861 } else { 862 if cur != '0' { 863 sb.pos-- 864 return sb.error(s, "in numeric literal") 865 } 866 } 867 cur = sb.Take() //pos on the next after cur 868 if cur == '.' { 869 if op := sb.Peek(); op > '9' || op < '0' { 870 if sb.isEnd() { 871 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 872 return scanError 873 } 874 return sb.error(s, "after decimal point in numeric literal") 875 } 876 sb.parseFigures() 877 cur = sb.Take() 878 } 879 if cur == 'e' || cur == 'E' { 880 op := sb.Peek() 881 if op != '+' && op != '-' && (op < '0' || op > '9') { 882 if sb.isEnd() { 883 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 884 return scanError 885 } 886 return sb.error(s, "in exponent of numeric literal") 887 } 888 op = sb.Take() 889 if op == '-' || op == '+' { 890 op = sb.Peek() 891 if op < '0' || op > '9' { 892 if sb.isEnd() { 893 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 894 return scanError 895 } 896 return sb.error(s, "in exponent of numeric literal") 897 } 898 } 899 900 sb.parseFigures() 901 902 } else { //pos on the second after unknown symbol. like 123ua. pos now at a 903 sb.pos-- 904 } 905 s.pushRecord(scanEndLiteral, sb.pos-1) 906 return scanEndLiteral 907 } 908 909 func (sb *streamByte) parseFigures() { 910 c := sb.Take() 911 912 for '0' <= c && c <= '9' { 913 c = sb.Take() 914 } 915 sb.pos-- 916 } 917 918 func (s *scanner) parseObject(sb *streamByte) int { 919 s.pushRecord(scanBeginObject, sb.pos) 920 sb.pos++ // skip { 921 sb.skipSpaces() 922 cur := sb.Peek() 923 if sb.isEnd() { 924 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 925 return scanError 926 } 927 928 if cur != '"' && cur != '}' { 929 return sb.error(s, "looking for beginning of object key string") 930 } 931 for !sb.isEnd() { 932 sb.skipSpaces() 933 934 switch cur { 935 case '}': 936 s.pushRecord(scanEndObject, sb.pos) 937 sb.pos++ 938 return scanEndObject 939 case '"': 940 op := s.parseString(sb) 941 if op == scanError { 942 return op 943 } 944 sb.skipSpaces() 945 if sb.isEnd() { 946 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 947 return scanError 948 } 949 cur = sb.Peek() 950 951 if cur == ':' { 952 sb.pos++ 953 } else { 954 return sb.error(s, "after object key") 955 } 956 op = s.parseValue(sb) 957 if op == scanError { 958 return op 959 } 960 sb.skipSpaces() 961 if sb.isEnd() { 962 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 963 return scanError 964 } 965 cur = sb.Peek() 966 if cur == ',' { 967 sb.pos++ 968 if sb.isEnd() { 969 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 970 return scanError 971 } 972 sb.skipSpaces() 973 cur = sb.Peek() 974 } 975 default: 976 return sb.error(s, "after object key:value pair") 977 } 978 } 979 980 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 981 return scanError 982 } 983 984 func (s *scanner) parseArray(sb *streamByte) int { 985 s.pushRecord(scanBeginArray, sb.pos) 986 sb.pos++ 987 sb.skipSpaces() 988 if sb.isEnd() { 989 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 990 return scanError 991 } 992 cur := sb.Peek() 993 if cur == ']' { 994 s.pushRecord(scanEndArray, sb.pos) 995 sb.pos++ 996 return scanEndArray 997 } 998 op := s.parseValue(sb) 999 if op == scanError { 1000 return op 1001 } 1002 sb.skipSpaces() 1003 1004 cur = sb.Peek() 1005 for !sb.isEnd() { 1006 switch cur { 1007 case ']': 1008 s.pushRecord(scanEndArray, sb.pos) 1009 sb.pos++ 1010 return scanEndArray 1011 case ',': 1012 sb.pos++ 1013 sb.skipSpaces() 1014 default: 1015 return sb.error(s, "after array element") 1016 } 1017 1018 op = s.parseValue(sb) 1019 if op == scanError { 1020 return op 1021 } 1022 1023 sb.skipSpaces() 1024 cur = sb.Peek() 1025 } 1026 1027 //here is incomplete array 1028 s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)} 1029 return scanError 1030 1031 }