github.com/aergoio/aergo@v1.3.1/cmd/aergocli/util/encoding/json/scanner.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 // JSON value parser state machine. 8 // Just about at the limit of what is reasonable to write by hand. 9 // Some parts are a bit tedious, but overall it nicely factors out the 10 // otherwise common code from the multiple scanning functions 11 // in this package (Compact, Indent, checkValid, etc). 12 // 13 // This file starts with two simple examples using the scanner 14 // before diving into the scanner itself. 15 16 import "strconv" 17 18 // Valid reports whether data is a valid JSON encoding. 19 func Valid(data []byte) bool { 20 return checkValid(data, &scanner{}) == nil 21 } 22 23 // checkValid verifies that data is valid JSON-encoded data. 24 // scan is passed in for use by checkValid to avoid an allocation. 25 func checkValid(data []byte, scan *scanner) error { 26 scan.reset() 27 for _, c := range data { 28 scan.bytes++ 29 if scan.step(scan, c) == scanError { 30 return scan.err 31 } 32 } 33 if scan.eof() == scanError { 34 return scan.err 35 } 36 return nil 37 } 38 39 // A SyntaxError is a description of a JSON syntax error. 40 type SyntaxError struct { 41 msg string // description of error 42 Offset int64 // error occurred after reading Offset bytes 43 } 44 45 func (e *SyntaxError) Error() string { return e.msg } 46 47 // A scanner is a JSON scanning state machine. 48 // Callers call scan.reset() and then pass bytes in one at a time 49 // by calling scan.step(&scan, c) for each byte. 50 // The return value, referred to as an opcode, tells the 51 // caller about significant parsing events like beginning 52 // and ending literals, objects, and arrays, so that the 53 // caller can follow along if it wishes. 54 // The return value scanEnd indicates that a single top-level 55 // JSON value has been completed, *before* the byte that 56 // just got passed in. (The indication must be delayed in order 57 // to recognize the end of numbers: is 123 a whole value or 58 // the beginning of 12345e+6?). 59 type scanner struct { 60 // The step is a func to be called to execute the next transition. 61 // Also tried using an integer constant and a single func 62 // with a switch, but using the func directly was 10% faster 63 // on a 64-bit Mac Mini, and it's nicer to read. 64 step func(*scanner, byte) int 65 66 // Reached end of top-level value. 67 endTop bool 68 69 // Stack of what we're in the middle of - array values, object keys, object values. 70 parseState []int 71 72 // Error that happened, if any. 73 err error 74 75 // total bytes consumed, updated by decoder.Decode 76 bytes int64 77 } 78 79 // These values are returned by the state transition functions 80 // assigned to scanner.state and the method scanner.eof. 81 // They give details about the current state of the scan that 82 // callers might be interested to know about. 83 // It is okay to ignore the return value of any particular 84 // call to scanner.state: if one call returns scanError, 85 // every subsequent call will return scanError too. 86 const ( 87 // Continue. 88 scanContinue = iota // uninteresting byte 89 scanBeginLiteral // end implied by next result != scanContinue 90 scanBeginObject // begin object 91 scanObjectKey // just finished object key (string) 92 scanObjectValue // just finished non-last object value 93 scanEndObject // end object (implies scanObjectValue if possible) 94 scanBeginArray // begin array 95 scanArrayValue // just finished array value 96 scanEndArray // end array (implies scanArrayValue if possible) 97 scanSkipSpace // space byte; can skip; known to be last "continue" result 98 99 // Stop. 100 scanEnd // top-level value ended *before* this byte; known to be first "stop" result 101 scanError // hit an error, scanner.err. 102 ) 103 104 // These values are stored in the parseState stack. 105 // They give the current state of a composite value 106 // being scanned. If the parser is inside a nested value 107 // the parseState describes the nested state, outermost at entry 0. 108 const ( 109 parseObjectKey = iota // parsing object key (before colon) 110 parseObjectValue // parsing object value (after colon) 111 parseArrayValue // parsing array value 112 ) 113 114 // reset prepares the scanner for use. 115 // It must be called before calling s.step. 116 func (s *scanner) reset() { 117 s.step = stateBeginValue 118 s.parseState = s.parseState[0:0] 119 s.err = nil 120 s.endTop = false 121 } 122 123 // eof tells the scanner that the end of input has been reached. 124 // It returns a scan status just as s.step does. 125 func (s *scanner) eof() int { 126 if s.err != nil { 127 return scanError 128 } 129 if s.endTop { 130 return scanEnd 131 } 132 s.step(s, ' ') 133 if s.endTop { 134 return scanEnd 135 } 136 if s.err == nil { 137 s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} 138 } 139 return scanError 140 } 141 142 // pushParseState pushes a new parse state p onto the parse stack. 143 func (s *scanner) pushParseState(p int) { 144 s.parseState = append(s.parseState, p) 145 } 146 147 // popParseState pops a parse state (already obtained) off the stack 148 // and updates s.step accordingly. 149 func (s *scanner) popParseState() { 150 n := len(s.parseState) - 1 151 s.parseState = s.parseState[0:n] 152 if n == 0 { 153 s.step = stateEndTop 154 s.endTop = true 155 } else { 156 s.step = stateEndValue 157 } 158 } 159 160 func isSpace(c byte) bool { 161 return c == ' ' || c == '\t' || c == '\r' || c == '\n' 162 } 163 164 // stateBeginValueOrEmpty is the state after reading `[`. 165 func stateBeginValueOrEmpty(s *scanner, c byte) int { 166 if c <= ' ' && isSpace(c) { 167 return scanSkipSpace 168 } 169 if c == ']' { 170 return stateEndValue(s, c) 171 } 172 return stateBeginValue(s, c) 173 } 174 175 // stateBeginValue is the state at the beginning of the input. 176 func stateBeginValue(s *scanner, c byte) int { 177 if c <= ' ' && isSpace(c) { 178 return scanSkipSpace 179 } 180 switch c { 181 case '{': 182 s.step = stateBeginStringOrEmpty 183 s.pushParseState(parseObjectKey) 184 return scanBeginObject 185 case '[': 186 s.step = stateBeginValueOrEmpty 187 s.pushParseState(parseArrayValue) 188 return scanBeginArray 189 case '"': 190 s.step = stateInString 191 return scanBeginLiteral 192 case '-': 193 s.step = stateNeg 194 return scanBeginLiteral 195 case '0': // beginning of 0.123 196 s.step = state0 197 return scanBeginLiteral 198 case 't': // beginning of true 199 s.step = stateT 200 return scanBeginLiteral 201 case 'f': // beginning of false 202 s.step = stateF 203 return scanBeginLiteral 204 case 'n': // beginning of null 205 s.step = stateN 206 return scanBeginLiteral 207 } 208 if '1' <= c && c <= '9' { // beginning of 1234.5 209 s.step = state1 210 return scanBeginLiteral 211 } 212 return s.error(c, "looking for beginning of value") 213 } 214 215 // stateBeginStringOrEmpty is the state after reading `{`. 216 func stateBeginStringOrEmpty(s *scanner, c byte) int { 217 if c <= ' ' && isSpace(c) { 218 return scanSkipSpace 219 } 220 if c == '}' { 221 n := len(s.parseState) 222 s.parseState[n-1] = parseObjectValue 223 return stateEndValue(s, c) 224 } 225 return stateBeginString(s, c) 226 } 227 228 // stateBeginString is the state after reading `{"key": value,`. 229 func stateBeginString(s *scanner, c byte) int { 230 if c <= ' ' && isSpace(c) { 231 return scanSkipSpace 232 } 233 if c == '"' { 234 s.step = stateInString 235 return scanBeginLiteral 236 } 237 return s.error(c, "looking for beginning of object key string") 238 } 239 240 // stateEndValue is the state after completing a value, 241 // such as after reading `{}` or `true` or `["x"`. 242 func stateEndValue(s *scanner, c byte) int { 243 n := len(s.parseState) 244 if n == 0 { 245 // Completed top-level before the current byte. 246 s.step = stateEndTop 247 s.endTop = true 248 return stateEndTop(s, c) 249 } 250 if c <= ' ' && isSpace(c) { 251 s.step = stateEndValue 252 return scanSkipSpace 253 } 254 ps := s.parseState[n-1] 255 switch ps { 256 case parseObjectKey: 257 if c == ':' { 258 s.parseState[n-1] = parseObjectValue 259 s.step = stateBeginValue 260 return scanObjectKey 261 } 262 return s.error(c, "after object key") 263 case parseObjectValue: 264 if c == ',' { 265 s.parseState[n-1] = parseObjectKey 266 s.step = stateBeginString 267 return scanObjectValue 268 } 269 if c == '}' { 270 s.popParseState() 271 return scanEndObject 272 } 273 return s.error(c, "after object key:value pair") 274 case parseArrayValue: 275 if c == ',' { 276 s.step = stateBeginValue 277 return scanArrayValue 278 } 279 if c == ']' { 280 s.popParseState() 281 return scanEndArray 282 } 283 return s.error(c, "after array element") 284 } 285 return s.error(c, "") 286 } 287 288 // stateEndTop is the state after finishing the top-level value, 289 // such as after reading `{}` or `[1,2,3]`. 290 // Only space characters should be seen now. 291 func stateEndTop(s *scanner, c byte) int { 292 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 293 // Complain about non-space byte on next call. 294 s.error(c, "after top-level value") 295 } 296 return scanEnd 297 } 298 299 // stateInString is the state after reading `"`. 300 func stateInString(s *scanner, c byte) int { 301 if c == '"' { 302 s.step = stateEndValue 303 return scanContinue 304 } 305 if c == '\\' { 306 s.step = stateInStringEsc 307 return scanContinue 308 } 309 if c < 0x20 { 310 return s.error(c, "in string literal") 311 } 312 return scanContinue 313 } 314 315 // stateInStringEsc is the state after reading `"\` during a quoted string. 316 func stateInStringEsc(s *scanner, c byte) int { 317 switch c { 318 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': 319 s.step = stateInString 320 return scanContinue 321 case 'u': 322 s.step = stateInStringEscU 323 return scanContinue 324 } 325 return s.error(c, "in string escape code") 326 } 327 328 // stateInStringEscU is the state after reading `"\u` during a quoted string. 329 func stateInStringEscU(s *scanner, c byte) int { 330 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 331 s.step = stateInStringEscU1 332 return scanContinue 333 } 334 // numbers 335 return s.error(c, "in \\u hexadecimal character escape") 336 } 337 338 // stateInStringEscU1 is the state after reading `"\u1` during a quoted string. 339 func stateInStringEscU1(s *scanner, c byte) int { 340 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 341 s.step = stateInStringEscU12 342 return scanContinue 343 } 344 // numbers 345 return s.error(c, "in \\u hexadecimal character escape") 346 } 347 348 // stateInStringEscU12 is the state after reading `"\u12` during a quoted string. 349 func stateInStringEscU12(s *scanner, c byte) int { 350 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 351 s.step = stateInStringEscU123 352 return scanContinue 353 } 354 // numbers 355 return s.error(c, "in \\u hexadecimal character escape") 356 } 357 358 // stateInStringEscU123 is the state after reading `"\u123` during a quoted string. 359 func stateInStringEscU123(s *scanner, c byte) int { 360 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 361 s.step = stateInString 362 return scanContinue 363 } 364 // numbers 365 return s.error(c, "in \\u hexadecimal character escape") 366 } 367 368 // stateNeg is the state after reading `-` during a number. 369 func stateNeg(s *scanner, c byte) int { 370 if c == '0' { 371 s.step = state0 372 return scanContinue 373 } 374 if '1' <= c && c <= '9' { 375 s.step = state1 376 return scanContinue 377 } 378 return s.error(c, "in numeric literal") 379 } 380 381 // state1 is the state after reading a non-zero integer during a number, 382 // such as after reading `1` or `100` but not `0`. 383 func state1(s *scanner, c byte) int { 384 if '0' <= c && c <= '9' { 385 s.step = state1 386 return scanContinue 387 } 388 return state0(s, c) 389 } 390 391 // state0 is the state after reading `0` during a number. 392 func state0(s *scanner, c byte) int { 393 if c == '.' { 394 s.step = stateDot 395 return scanContinue 396 } 397 if c == 'e' || c == 'E' { 398 s.step = stateE 399 return scanContinue 400 } 401 return stateEndValue(s, c) 402 } 403 404 // stateDot is the state after reading the integer and decimal point in a number, 405 // such as after reading `1.`. 406 func stateDot(s *scanner, c byte) int { 407 if '0' <= c && c <= '9' { 408 s.step = stateDot0 409 return scanContinue 410 } 411 return s.error(c, "after decimal point in numeric literal") 412 } 413 414 // stateDot0 is the state after reading the integer, decimal point, and subsequent 415 // digits of a number, such as after reading `3.14`. 416 func stateDot0(s *scanner, c byte) int { 417 if '0' <= c && c <= '9' { 418 return scanContinue 419 } 420 if c == 'e' || c == 'E' { 421 s.step = stateE 422 return scanContinue 423 } 424 return stateEndValue(s, c) 425 } 426 427 // stateE is the state after reading the mantissa and e in a number, 428 // such as after reading `314e` or `0.314e`. 429 func stateE(s *scanner, c byte) int { 430 if c == '+' || c == '-' { 431 s.step = stateESign 432 return scanContinue 433 } 434 return stateESign(s, c) 435 } 436 437 // stateESign is the state after reading the mantissa, e, and sign in a number, 438 // such as after reading `314e-` or `0.314e+`. 439 func stateESign(s *scanner, c byte) int { 440 if '0' <= c && c <= '9' { 441 s.step = stateE0 442 return scanContinue 443 } 444 return s.error(c, "in exponent of numeric literal") 445 } 446 447 // stateE0 is the state after reading the mantissa, e, optional sign, 448 // and at least one digit of the exponent in a number, 449 // such as after reading `314e-2` or `0.314e+1` or `3.14e0`. 450 func stateE0(s *scanner, c byte) int { 451 if '0' <= c && c <= '9' { 452 return scanContinue 453 } 454 return stateEndValue(s, c) 455 } 456 457 // stateT is the state after reading `t`. 458 func stateT(s *scanner, c byte) int { 459 if c == 'r' { 460 s.step = stateTr 461 return scanContinue 462 } 463 return s.error(c, "in literal true (expecting 'r')") 464 } 465 466 // stateTr is the state after reading `tr`. 467 func stateTr(s *scanner, c byte) int { 468 if c == 'u' { 469 s.step = stateTru 470 return scanContinue 471 } 472 return s.error(c, "in literal true (expecting 'u')") 473 } 474 475 // stateTru is the state after reading `tru`. 476 func stateTru(s *scanner, c byte) int { 477 if c == 'e' { 478 s.step = stateEndValue 479 return scanContinue 480 } 481 return s.error(c, "in literal true (expecting 'e')") 482 } 483 484 // stateF is the state after reading `f`. 485 func stateF(s *scanner, c byte) int { 486 if c == 'a' { 487 s.step = stateFa 488 return scanContinue 489 } 490 return s.error(c, "in literal false (expecting 'a')") 491 } 492 493 // stateFa is the state after reading `fa`. 494 func stateFa(s *scanner, c byte) int { 495 if c == 'l' { 496 s.step = stateFal 497 return scanContinue 498 } 499 return s.error(c, "in literal false (expecting 'l')") 500 } 501 502 // stateFal is the state after reading `fal`. 503 func stateFal(s *scanner, c byte) int { 504 if c == 's' { 505 s.step = stateFals 506 return scanContinue 507 } 508 return s.error(c, "in literal false (expecting 's')") 509 } 510 511 // stateFals is the state after reading `fals`. 512 func stateFals(s *scanner, c byte) int { 513 if c == 'e' { 514 s.step = stateEndValue 515 return scanContinue 516 } 517 return s.error(c, "in literal false (expecting 'e')") 518 } 519 520 // stateN is the state after reading `n`. 521 func stateN(s *scanner, c byte) int { 522 if c == 'u' { 523 s.step = stateNu 524 return scanContinue 525 } 526 return s.error(c, "in literal null (expecting 'u')") 527 } 528 529 // stateNu is the state after reading `nu`. 530 func stateNu(s *scanner, c byte) int { 531 if c == 'l' { 532 s.step = stateNul 533 return scanContinue 534 } 535 return s.error(c, "in literal null (expecting 'l')") 536 } 537 538 // stateNul is the state after reading `nul`. 539 func stateNul(s *scanner, c byte) int { 540 if c == 'l' { 541 s.step = stateEndValue 542 return scanContinue 543 } 544 return s.error(c, "in literal null (expecting 'l')") 545 } 546 547 // stateError is the state after reaching a syntax error, 548 // such as after reading `[1}` or `5.1.2`. 549 func stateError(s *scanner, c byte) int { 550 return scanError 551 } 552 553 // error records an error and switches to the error state. 554 func (s *scanner) error(c byte, context string) int { 555 s.step = stateError 556 s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} 557 return scanError 558 } 559 560 // quoteChar formats c as a quoted character literal 561 func quoteChar(c byte) string { 562 // special cases - different from quoted strings 563 if c == '\'' { 564 return `'\''` 565 } 566 if c == '"' { 567 return `'"'` 568 } 569 570 // use quoted string with different quotation marks 571 s := strconv.Quote(string(c)) 572 return "'" + s[1:len(s)-1] + "'" 573 }