get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/conf/lex.go (about) 1 // Copyright 2013-2018 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 // Customized heavily from 15 // https://github.com/BurntSushi/toml/blob/master/lex.go, which is based on 16 // Rob Pike's talk: http://cuddle.googlecode.com/hg/talk/lex.html 17 18 // The format supported is less restrictive than today's formats. 19 // Supports mixed Arrays [], nested Maps {}, multiple comment types (# and //) 20 // Also supports key value assignments using '=' or ':' or whiteSpace() 21 // e.g. foo = 2, foo : 2, foo 2 22 // maps can be assigned with no key separator as well 23 // semicolons as value terminators in key/value assignments are optional 24 // 25 // see lex_test.go for more examples. 26 27 package conf 28 29 import ( 30 "encoding/hex" 31 "fmt" 32 "strings" 33 "unicode" 34 "unicode/utf8" 35 ) 36 37 type itemType int 38 39 const ( 40 itemError itemType = iota 41 itemNIL // used in the parser to indicate no type 42 itemEOF 43 itemKey 44 itemText 45 itemString 46 itemBool 47 itemInteger 48 itemFloat 49 itemDatetime 50 itemArrayStart 51 itemArrayEnd 52 itemMapStart 53 itemMapEnd 54 itemCommentStart 55 itemVariable 56 itemInclude 57 ) 58 59 const ( 60 eof = 0 61 mapStart = '{' 62 mapEnd = '}' 63 keySepEqual = '=' 64 keySepColon = ':' 65 arrayStart = '[' 66 arrayEnd = ']' 67 arrayValTerm = ',' 68 mapValTerm = ',' 69 commentHashStart = '#' 70 commentSlashStart = '/' 71 dqStringStart = '"' 72 dqStringEnd = '"' 73 sqStringStart = '\'' 74 sqStringEnd = '\'' 75 optValTerm = ';' 76 topOptStart = '{' 77 topOptValTerm = ',' 78 topOptTerm = '}' 79 blockStart = '(' 80 blockEnd = ')' 81 mapEndString = string(mapEnd) 82 ) 83 84 type stateFn func(lx *lexer) stateFn 85 86 type lexer struct { 87 input string 88 start int 89 pos int 90 width int 91 line int 92 state stateFn 93 items chan item 94 95 // A stack of state functions used to maintain context. 96 // The idea is to reuse parts of the state machine in various places. 97 // For example, values can appear at the top level or within arbitrarily 98 // nested arrays. The last state on the stack is used after a value has 99 // been lexed. Similarly for comments. 100 stack []stateFn 101 102 // Used for processing escapable substrings in double-quoted and raw strings 103 stringParts []string 104 stringStateFn stateFn 105 106 // lstart is the start position of the current line. 107 lstart int 108 109 // ilstart is the start position of the line from the current item. 110 ilstart int 111 } 112 113 type item struct { 114 typ itemType 115 val string 116 line int 117 pos int 118 } 119 120 func (lx *lexer) nextItem() item { 121 for { 122 select { 123 case item := <-lx.items: 124 return item 125 default: 126 lx.state = lx.state(lx) 127 } 128 } 129 } 130 131 func lex(input string) *lexer { 132 lx := &lexer{ 133 input: input, 134 state: lexTop, 135 line: 1, 136 items: make(chan item, 10), 137 stack: make([]stateFn, 0, 10), 138 stringParts: []string{}, 139 } 140 return lx 141 } 142 143 func (lx *lexer) push(state stateFn) { 144 lx.stack = append(lx.stack, state) 145 } 146 147 func (lx *lexer) pop() stateFn { 148 if len(lx.stack) == 0 { 149 return lx.errorf("BUG in lexer: no states to pop.") 150 } 151 li := len(lx.stack) - 1 152 last := lx.stack[li] 153 lx.stack = lx.stack[0:li] 154 return last 155 } 156 157 func (lx *lexer) emit(typ itemType) { 158 val := strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos] 159 160 // Position of item in line where it started. 161 pos := lx.pos - lx.ilstart - len(val) 162 lx.items <- item{typ, val, lx.line, pos} 163 lx.start = lx.pos 164 lx.ilstart = lx.lstart 165 } 166 167 func (lx *lexer) emitString() { 168 var finalString string 169 if len(lx.stringParts) > 0 { 170 finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos] 171 lx.stringParts = []string{} 172 } else { 173 finalString = lx.input[lx.start:lx.pos] 174 } 175 // Position of string in line where it started. 176 pos := lx.pos - lx.ilstart - len(finalString) 177 lx.items <- item{itemString, finalString, lx.line, pos} 178 lx.start = lx.pos 179 lx.ilstart = lx.lstart 180 } 181 182 func (lx *lexer) addCurrentStringPart(offset int) { 183 lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset]) 184 lx.start = lx.pos 185 } 186 187 func (lx *lexer) addStringPart(s string) stateFn { 188 lx.stringParts = append(lx.stringParts, s) 189 lx.start = lx.pos 190 return lx.stringStateFn 191 } 192 193 func (lx *lexer) hasEscapedParts() bool { 194 return len(lx.stringParts) > 0 195 } 196 197 func (lx *lexer) next() (r rune) { 198 if lx.pos >= len(lx.input) { 199 lx.width = 0 200 return eof 201 } 202 203 if lx.input[lx.pos] == '\n' { 204 lx.line++ 205 206 // Mark start position of current line. 207 lx.lstart = lx.pos 208 } 209 r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:]) 210 lx.pos += lx.width 211 212 return r 213 } 214 215 // ignore skips over the pending input before this point. 216 func (lx *lexer) ignore() { 217 lx.start = lx.pos 218 lx.ilstart = lx.lstart 219 } 220 221 // backup steps back one rune. Can be called only once per call of next. 222 func (lx *lexer) backup() { 223 lx.pos -= lx.width 224 if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { 225 lx.line-- 226 } 227 } 228 229 // peek returns but does not consume the next rune in the input. 230 func (lx *lexer) peek() rune { 231 r := lx.next() 232 lx.backup() 233 return r 234 } 235 236 // errorf stops all lexing by emitting an error and returning `nil`. 237 // Note that any value that is a character is escaped if it's a special 238 // character (new lines, tabs, etc.). 239 func (lx *lexer) errorf(format string, values ...interface{}) stateFn { 240 for i, value := range values { 241 if v, ok := value.(rune); ok { 242 values[i] = escapeSpecial(v) 243 } 244 } 245 246 // Position of error in current line. 247 pos := lx.pos - lx.lstart 248 lx.items <- item{ 249 itemError, 250 fmt.Sprintf(format, values...), 251 lx.line, 252 pos, 253 } 254 return nil 255 } 256 257 // lexTop consumes elements at the top level of data structure. 258 func lexTop(lx *lexer) stateFn { 259 r := lx.next() 260 if unicode.IsSpace(r) { 261 return lexSkip(lx, lexTop) 262 } 263 264 switch r { 265 case topOptStart: 266 return lexSkip(lx, lexTop) 267 case commentHashStart: 268 lx.push(lexTop) 269 return lexCommentStart 270 case commentSlashStart: 271 rn := lx.next() 272 if rn == commentSlashStart { 273 lx.push(lexTop) 274 return lexCommentStart 275 } 276 lx.backup() 277 fallthrough 278 case eof: 279 if lx.pos > lx.start { 280 return lx.errorf("Unexpected EOF.") 281 } 282 lx.emit(itemEOF) 283 return nil 284 } 285 286 // At this point, the only valid item can be a key, so we back up 287 // and let the key lexer do the rest. 288 lx.backup() 289 lx.push(lexTopValueEnd) 290 return lexKeyStart 291 } 292 293 // lexTopValueEnd is entered whenever a top-level value has been consumed. 294 // It must see only whitespace, and will turn back to lexTop upon a new line. 295 // If it sees EOF, it will quit the lexer successfully. 296 func lexTopValueEnd(lx *lexer) stateFn { 297 r := lx.next() 298 switch { 299 case r == commentHashStart: 300 // a comment will read to a new line for us. 301 lx.push(lexTop) 302 return lexCommentStart 303 case r == commentSlashStart: 304 rn := lx.next() 305 if rn == commentSlashStart { 306 lx.push(lexTop) 307 return lexCommentStart 308 } 309 lx.backup() 310 fallthrough 311 case isWhitespace(r): 312 return lexTopValueEnd 313 case isNL(r) || r == eof || r == optValTerm || r == topOptValTerm || r == topOptTerm: 314 lx.ignore() 315 return lexTop 316 } 317 return lx.errorf("Expected a top-level value to end with a new line, "+ 318 "comment or EOF, but got '%v' instead.", r) 319 } 320 321 // lexKeyStart consumes a key name up until the first non-whitespace character. 322 // lexKeyStart will ignore whitespace. It will also eat enclosing quotes. 323 func lexKeyStart(lx *lexer) stateFn { 324 r := lx.peek() 325 switch { 326 case isKeySeparator(r): 327 return lx.errorf("Unexpected key separator '%v'", r) 328 case unicode.IsSpace(r): 329 lx.next() 330 return lexSkip(lx, lexKeyStart) 331 case r == dqStringStart: 332 lx.next() 333 return lexSkip(lx, lexDubQuotedKey) 334 case r == sqStringStart: 335 lx.next() 336 return lexSkip(lx, lexQuotedKey) 337 } 338 lx.ignore() 339 lx.next() 340 return lexKey 341 } 342 343 // lexDubQuotedKey consumes the text of a key between quotes. 344 func lexDubQuotedKey(lx *lexer) stateFn { 345 r := lx.peek() 346 if r == dqStringEnd { 347 lx.emit(itemKey) 348 lx.next() 349 return lexSkip(lx, lexKeyEnd) 350 } else if r == eof { 351 if lx.pos > lx.start { 352 return lx.errorf("Unexpected EOF.") 353 } 354 lx.emit(itemEOF) 355 return nil 356 } 357 lx.next() 358 return lexDubQuotedKey 359 } 360 361 // lexQuotedKey consumes the text of a key between quotes. 362 func lexQuotedKey(lx *lexer) stateFn { 363 r := lx.peek() 364 if r == sqStringEnd { 365 lx.emit(itemKey) 366 lx.next() 367 return lexSkip(lx, lexKeyEnd) 368 } else if r == eof { 369 if lx.pos > lx.start { 370 return lx.errorf("Unexpected EOF.") 371 } 372 lx.emit(itemEOF) 373 return nil 374 } 375 lx.next() 376 return lexQuotedKey 377 } 378 379 // keyCheckKeyword will check for reserved keywords as the key value when the key is 380 // separated with a space. 381 func (lx *lexer) keyCheckKeyword(fallThrough, push stateFn) stateFn { 382 key := strings.ToLower(lx.input[lx.start:lx.pos]) 383 switch key { 384 case "include": 385 lx.ignore() 386 if push != nil { 387 lx.push(push) 388 } 389 return lexIncludeStart 390 } 391 lx.emit(itemKey) 392 return fallThrough 393 } 394 395 // lexIncludeStart will consume the whitespace til the start of the value. 396 func lexIncludeStart(lx *lexer) stateFn { 397 r := lx.next() 398 if isWhitespace(r) { 399 return lexSkip(lx, lexIncludeStart) 400 } 401 lx.backup() 402 return lexInclude 403 } 404 405 // lexIncludeQuotedString consumes the inner contents of a string. It assumes that the 406 // beginning '"' has already been consumed and ignored. It will not interpret any 407 // internal contents. 408 func lexIncludeQuotedString(lx *lexer) stateFn { 409 r := lx.next() 410 switch { 411 case r == sqStringEnd: 412 lx.backup() 413 lx.emit(itemInclude) 414 lx.next() 415 lx.ignore() 416 return lx.pop() 417 case r == eof: 418 return lx.errorf("Unexpected EOF in quoted include") 419 } 420 return lexIncludeQuotedString 421 } 422 423 // lexIncludeDubQuotedString consumes the inner contents of a string. It assumes that the 424 // beginning '"' has already been consumed and ignored. It will not interpret any 425 // internal contents. 426 func lexIncludeDubQuotedString(lx *lexer) stateFn { 427 r := lx.next() 428 switch { 429 case r == dqStringEnd: 430 lx.backup() 431 lx.emit(itemInclude) 432 lx.next() 433 lx.ignore() 434 return lx.pop() 435 case r == eof: 436 return lx.errorf("Unexpected EOF in double quoted include") 437 } 438 return lexIncludeDubQuotedString 439 } 440 441 // lexIncludeString consumes the inner contents of a raw string. 442 func lexIncludeString(lx *lexer) stateFn { 443 r := lx.next() 444 switch { 445 case isNL(r) || r == eof || r == optValTerm || r == mapEnd || isWhitespace(r): 446 lx.backup() 447 lx.emit(itemInclude) 448 return lx.pop() 449 case r == sqStringEnd: 450 lx.backup() 451 lx.emit(itemInclude) 452 lx.next() 453 lx.ignore() 454 return lx.pop() 455 } 456 return lexIncludeString 457 } 458 459 // lexInclude will consume the include value. 460 func lexInclude(lx *lexer) stateFn { 461 r := lx.next() 462 switch { 463 case r == sqStringStart: 464 lx.ignore() // ignore the " or ' 465 return lexIncludeQuotedString 466 case r == dqStringStart: 467 lx.ignore() // ignore the " or ' 468 return lexIncludeDubQuotedString 469 case r == arrayStart: 470 return lx.errorf("Expected include value but found start of an array") 471 case r == mapStart: 472 return lx.errorf("Expected include value but found start of a map") 473 case r == blockStart: 474 return lx.errorf("Expected include value but found start of a block") 475 case unicode.IsDigit(r), r == '-': 476 return lx.errorf("Expected include value but found start of a number") 477 case r == '\\': 478 return lx.errorf("Expected include value but found escape sequence") 479 case isNL(r): 480 return lx.errorf("Expected include value but found new line") 481 } 482 lx.backup() 483 return lexIncludeString 484 } 485 486 // lexKey consumes the text of a key. Assumes that the first character (which 487 // is not whitespace) has already been consumed. 488 func lexKey(lx *lexer) stateFn { 489 r := lx.peek() 490 if unicode.IsSpace(r) { 491 // Spaces signal we could be looking at a keyword, e.g. include. 492 // Keywords will eat the keyword and set the appropriate return stateFn. 493 return lx.keyCheckKeyword(lexKeyEnd, nil) 494 } else if isKeySeparator(r) || r == eof { 495 lx.emit(itemKey) 496 return lexKeyEnd 497 } 498 lx.next() 499 return lexKey 500 } 501 502 // lexKeyEnd consumes the end of a key (up to the key separator). 503 // Assumes that the first whitespace character after a key (or the '=' or ':' 504 // separator) has NOT been consumed. 505 func lexKeyEnd(lx *lexer) stateFn { 506 r := lx.next() 507 switch { 508 case unicode.IsSpace(r): 509 return lexSkip(lx, lexKeyEnd) 510 case isKeySeparator(r): 511 return lexSkip(lx, lexValue) 512 case r == eof: 513 lx.emit(itemEOF) 514 return nil 515 } 516 // We start the value here 517 lx.backup() 518 return lexValue 519 } 520 521 // lexValue starts the consumption of a value anywhere a value is expected. 522 // lexValue will ignore whitespace. 523 // After a value is lexed, the last state on the next is popped and returned. 524 func lexValue(lx *lexer) stateFn { 525 // We allow whitespace to precede a value, but NOT new lines. 526 // In array syntax, the array states are responsible for ignoring new lines. 527 r := lx.next() 528 if isWhitespace(r) { 529 return lexSkip(lx, lexValue) 530 } 531 532 switch { 533 case r == arrayStart: 534 lx.ignore() 535 lx.emit(itemArrayStart) 536 return lexArrayValue 537 case r == mapStart: 538 lx.ignore() 539 lx.emit(itemMapStart) 540 return lexMapKeyStart 541 case r == sqStringStart: 542 lx.ignore() // ignore the " or ' 543 return lexQuotedString 544 case r == dqStringStart: 545 lx.ignore() // ignore the " or ' 546 lx.stringStateFn = lexDubQuotedString 547 return lexDubQuotedString 548 case r == '-': 549 return lexNegNumberStart 550 case r == blockStart: 551 lx.ignore() 552 return lexBlock 553 case unicode.IsDigit(r): 554 lx.backup() // avoid an extra state and use the same as above 555 return lexNumberOrDateOrStringOrIPStart 556 case r == '.': // special error case, be kind to users 557 return lx.errorf("Floats must start with a digit") 558 case isNL(r): 559 return lx.errorf("Expected value but found new line") 560 } 561 lx.backup() 562 lx.stringStateFn = lexString 563 return lexString 564 } 565 566 // lexArrayValue consumes one value in an array. It assumes that '[' or ',' 567 // have already been consumed. All whitespace and new lines are ignored. 568 func lexArrayValue(lx *lexer) stateFn { 569 r := lx.next() 570 switch { 571 case unicode.IsSpace(r): 572 return lexSkip(lx, lexArrayValue) 573 case r == commentHashStart: 574 lx.push(lexArrayValue) 575 return lexCommentStart 576 case r == commentSlashStart: 577 rn := lx.next() 578 if rn == commentSlashStart { 579 lx.push(lexArrayValue) 580 return lexCommentStart 581 } 582 lx.backup() 583 fallthrough 584 case r == arrayValTerm: 585 return lx.errorf("Unexpected array value terminator '%v'.", arrayValTerm) 586 case r == arrayEnd: 587 return lexArrayEnd 588 } 589 590 lx.backup() 591 lx.push(lexArrayValueEnd) 592 return lexValue 593 } 594 595 // lexArrayValueEnd consumes the cruft between values of an array. Namely, 596 // it ignores whitespace and expects either a ',' or a ']'. 597 func lexArrayValueEnd(lx *lexer) stateFn { 598 r := lx.next() 599 switch { 600 case isWhitespace(r): 601 return lexSkip(lx, lexArrayValueEnd) 602 case r == commentHashStart: 603 lx.push(lexArrayValueEnd) 604 return lexCommentStart 605 case r == commentSlashStart: 606 rn := lx.next() 607 if rn == commentSlashStart { 608 lx.push(lexArrayValueEnd) 609 return lexCommentStart 610 } 611 lx.backup() 612 fallthrough 613 case r == arrayValTerm || isNL(r): 614 return lexSkip(lx, lexArrayValue) // Move onto next 615 case r == arrayEnd: 616 return lexArrayEnd 617 } 618 return lx.errorf("Expected an array value terminator %q or an array "+ 619 "terminator %q, but got '%v' instead.", arrayValTerm, arrayEnd, r) 620 } 621 622 // lexArrayEnd finishes the lexing of an array. It assumes that a ']' has 623 // just been consumed. 624 func lexArrayEnd(lx *lexer) stateFn { 625 lx.ignore() 626 lx.emit(itemArrayEnd) 627 return lx.pop() 628 } 629 630 // lexMapKeyStart consumes a key name up until the first non-whitespace 631 // character. 632 // lexMapKeyStart will ignore whitespace. 633 func lexMapKeyStart(lx *lexer) stateFn { 634 r := lx.peek() 635 switch { 636 case isKeySeparator(r): 637 return lx.errorf("Unexpected key separator '%v'.", r) 638 case r == arrayEnd: 639 return lx.errorf("Unexpected array end '%v' processing map.", r) 640 case unicode.IsSpace(r): 641 lx.next() 642 return lexSkip(lx, lexMapKeyStart) 643 case r == mapEnd: 644 lx.next() 645 return lexSkip(lx, lexMapEnd) 646 case r == commentHashStart: 647 lx.next() 648 lx.push(lexMapKeyStart) 649 return lexCommentStart 650 case r == commentSlashStart: 651 lx.next() 652 rn := lx.next() 653 if rn == commentSlashStart { 654 lx.push(lexMapKeyStart) 655 return lexCommentStart 656 } 657 lx.backup() 658 case r == sqStringStart: 659 lx.next() 660 return lexSkip(lx, lexMapQuotedKey) 661 case r == dqStringStart: 662 lx.next() 663 return lexSkip(lx, lexMapDubQuotedKey) 664 case r == eof: 665 return lx.errorf("Unexpected EOF processing map.") 666 } 667 lx.ignore() 668 lx.next() 669 return lexMapKey 670 } 671 672 // lexMapQuotedKey consumes the text of a key between quotes. 673 func lexMapQuotedKey(lx *lexer) stateFn { 674 if r := lx.peek(); r == eof { 675 return lx.errorf("Unexpected EOF processing quoted map key.") 676 } else if r == sqStringEnd { 677 lx.emit(itemKey) 678 lx.next() 679 return lexSkip(lx, lexMapKeyEnd) 680 } 681 lx.next() 682 return lexMapQuotedKey 683 } 684 685 // lexMapDubQuotedKey consumes the text of a key between quotes. 686 func lexMapDubQuotedKey(lx *lexer) stateFn { 687 if r := lx.peek(); r == eof { 688 return lx.errorf("Unexpected EOF processing double quoted map key.") 689 } else if r == dqStringEnd { 690 lx.emit(itemKey) 691 lx.next() 692 return lexSkip(lx, lexMapKeyEnd) 693 } 694 lx.next() 695 return lexMapDubQuotedKey 696 } 697 698 // lexMapKey consumes the text of a key. Assumes that the first character (which 699 // is not whitespace) has already been consumed. 700 func lexMapKey(lx *lexer) stateFn { 701 if r := lx.peek(); r == eof { 702 return lx.errorf("Unexpected EOF processing map key.") 703 } else if unicode.IsSpace(r) { 704 // Spaces signal we could be looking at a keyword, e.g. include. 705 // Keywords will eat the keyword and set the appropriate return stateFn. 706 return lx.keyCheckKeyword(lexMapKeyEnd, lexMapValueEnd) 707 } else if isKeySeparator(r) { 708 lx.emit(itemKey) 709 return lexMapKeyEnd 710 } 711 lx.next() 712 return lexMapKey 713 } 714 715 // lexMapKeyEnd consumes the end of a key (up to the key separator). 716 // Assumes that the first whitespace character after a key (or the '=' 717 // separator) has NOT been consumed. 718 func lexMapKeyEnd(lx *lexer) stateFn { 719 r := lx.next() 720 switch { 721 case unicode.IsSpace(r): 722 return lexSkip(lx, lexMapKeyEnd) 723 case isKeySeparator(r): 724 return lexSkip(lx, lexMapValue) 725 } 726 // We start the value here 727 lx.backup() 728 return lexMapValue 729 } 730 731 // lexMapValue consumes one value in a map. It assumes that '{' or ',' 732 // have already been consumed. All whitespace and new lines are ignored. 733 // Map values can be separated by ',' or simple NLs. 734 func lexMapValue(lx *lexer) stateFn { 735 r := lx.next() 736 switch { 737 case unicode.IsSpace(r): 738 return lexSkip(lx, lexMapValue) 739 case r == mapValTerm: 740 return lx.errorf("Unexpected map value terminator %q.", mapValTerm) 741 case r == mapEnd: 742 return lexSkip(lx, lexMapEnd) 743 } 744 lx.backup() 745 lx.push(lexMapValueEnd) 746 return lexValue 747 } 748 749 // lexMapValueEnd consumes the cruft between values of a map. Namely, 750 // it ignores whitespace and expects either a ',' or a '}'. 751 func lexMapValueEnd(lx *lexer) stateFn { 752 r := lx.next() 753 switch { 754 case isWhitespace(r): 755 return lexSkip(lx, lexMapValueEnd) 756 case r == commentHashStart: 757 lx.push(lexMapValueEnd) 758 return lexCommentStart 759 case r == commentSlashStart: 760 rn := lx.next() 761 if rn == commentSlashStart { 762 lx.push(lexMapValueEnd) 763 return lexCommentStart 764 } 765 lx.backup() 766 fallthrough 767 case r == optValTerm || r == mapValTerm || isNL(r): 768 return lexSkip(lx, lexMapKeyStart) // Move onto next 769 case r == mapEnd: 770 return lexSkip(lx, lexMapEnd) 771 } 772 return lx.errorf("Expected a map value terminator %q or a map "+ 773 "terminator %q, but got '%v' instead.", mapValTerm, mapEnd, r) 774 } 775 776 // lexMapEnd finishes the lexing of a map. It assumes that a '}' has 777 // just been consumed. 778 func lexMapEnd(lx *lexer) stateFn { 779 lx.ignore() 780 lx.emit(itemMapEnd) 781 return lx.pop() 782 } 783 784 // Checks if the unquoted string was actually a boolean 785 func (lx *lexer) isBool() bool { 786 str := strings.ToLower(lx.input[lx.start:lx.pos]) 787 return str == "true" || str == "false" || 788 str == "on" || str == "off" || 789 str == "yes" || str == "no" 790 } 791 792 // Check if the unquoted string is a variable reference, starting with $. 793 func (lx *lexer) isVariable() bool { 794 if lx.start >= len(lx.input) { 795 return false 796 } 797 if lx.input[lx.start] == '$' { 798 lx.start += 1 799 return true 800 } 801 return false 802 } 803 804 // lexQuotedString consumes the inner contents of a string. It assumes that the 805 // beginning '"' has already been consumed and ignored. It will not interpret any 806 // internal contents. 807 func lexQuotedString(lx *lexer) stateFn { 808 r := lx.next() 809 switch { 810 case r == sqStringEnd: 811 lx.backup() 812 lx.emit(itemString) 813 lx.next() 814 lx.ignore() 815 return lx.pop() 816 case r == eof: 817 if lx.pos > lx.start { 818 return lx.errorf("Unexpected EOF.") 819 } 820 lx.emit(itemEOF) 821 return nil 822 } 823 return lexQuotedString 824 } 825 826 // lexDubQuotedString consumes the inner contents of a string. It assumes that the 827 // beginning '"' has already been consumed and ignored. It will not interpret any 828 // internal contents. 829 func lexDubQuotedString(lx *lexer) stateFn { 830 r := lx.next() 831 switch { 832 case r == '\\': 833 lx.addCurrentStringPart(1) 834 return lexStringEscape 835 case r == dqStringEnd: 836 lx.backup() 837 lx.emitString() 838 lx.next() 839 lx.ignore() 840 return lx.pop() 841 case r == eof: 842 if lx.pos > lx.start { 843 return lx.errorf("Unexpected EOF.") 844 } 845 lx.emit(itemEOF) 846 return nil 847 } 848 return lexDubQuotedString 849 } 850 851 // lexString consumes the inner contents of a raw string. 852 func lexString(lx *lexer) stateFn { 853 r := lx.next() 854 switch { 855 case r == '\\': 856 lx.addCurrentStringPart(1) 857 return lexStringEscape 858 // Termination of non-quoted strings 859 case isNL(r) || r == eof || r == optValTerm || 860 r == arrayValTerm || r == arrayEnd || r == mapEnd || 861 isWhitespace(r): 862 863 lx.backup() 864 if lx.hasEscapedParts() { 865 lx.emitString() 866 } else if lx.isBool() { 867 lx.emit(itemBool) 868 } else if lx.isVariable() { 869 lx.emit(itemVariable) 870 } else { 871 lx.emitString() 872 } 873 return lx.pop() 874 case r == sqStringEnd: 875 lx.backup() 876 lx.emitString() 877 lx.next() 878 lx.ignore() 879 return lx.pop() 880 } 881 return lexString 882 } 883 884 // lexBlock consumes the inner contents as a string. It assumes that the 885 // beginning '(' has already been consumed and ignored. It will continue 886 // processing until it finds a ')' on a new line by itself. 887 func lexBlock(lx *lexer) stateFn { 888 r := lx.next() 889 switch { 890 case r == blockEnd: 891 lx.backup() 892 lx.backup() 893 894 // Looking for a ')' character on a line by itself, if the previous 895 // character isn't a new line, then break so we keep processing the block. 896 if lx.next() != '\n' { 897 lx.next() 898 break 899 } 900 lx.next() 901 902 // Make sure the next character is a new line or an eof. We want a ')' on a 903 // bare line by itself. 904 switch lx.next() { 905 case '\n', eof: 906 lx.backup() 907 lx.backup() 908 lx.emit(itemString) 909 lx.next() 910 lx.ignore() 911 return lx.pop() 912 } 913 lx.backup() 914 case r == eof: 915 return lx.errorf("Unexpected EOF processing block.") 916 } 917 return lexBlock 918 } 919 920 // lexStringEscape consumes an escaped character. It assumes that the preceding 921 // '\\' has already been consumed. 922 func lexStringEscape(lx *lexer) stateFn { 923 r := lx.next() 924 switch r { 925 case 'x': 926 return lexStringBinary 927 case 't': 928 return lx.addStringPart("\t") 929 case 'n': 930 return lx.addStringPart("\n") 931 case 'r': 932 return lx.addStringPart("\r") 933 case '"': 934 return lx.addStringPart("\"") 935 case '\\': 936 return lx.addStringPart("\\") 937 } 938 return lx.errorf("Invalid escape character '%v'. Only the following "+ 939 "escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r) 940 } 941 942 // lexStringBinary consumes two hexadecimal digits following '\x'. It assumes 943 // that the '\x' has already been consumed. 944 func lexStringBinary(lx *lexer) stateFn { 945 r := lx.next() 946 if isNL(r) { 947 return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") 948 } 949 r = lx.next() 950 if isNL(r) { 951 return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line") 952 } 953 offset := lx.pos - 2 954 byteString, err := hex.DecodeString(lx.input[offset:lx.pos]) 955 if err != nil { 956 return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos]) 957 } 958 lx.addStringPart(string(byteString)) 959 return lx.stringStateFn 960 } 961 962 // lexNumberOrDateOrStringOrIPStart consumes either a (positive) 963 // integer, a float, a datetime, or IP, or String that started with a 964 // number. It assumes that NO negative sign has been consumed, that 965 // is triggered above. 966 func lexNumberOrDateOrStringOrIPStart(lx *lexer) stateFn { 967 r := lx.next() 968 if !unicode.IsDigit(r) { 969 if r == '.' { 970 return lx.errorf("Floats must start with a digit, not '.'.") 971 } 972 return lx.errorf("Expected a digit but got '%v'.", r) 973 } 974 return lexNumberOrDateOrStringOrIP 975 } 976 977 // lexNumberOrDateOrStringOrIP consumes either a (positive) integer, 978 // float, datetime, IP or string without quotes that starts with a 979 // number. 980 func lexNumberOrDateOrStringOrIP(lx *lexer) stateFn { 981 r := lx.next() 982 switch { 983 case r == '-': 984 if lx.pos-lx.start != 5 { 985 return lx.errorf("All ISO8601 dates must be in full Zulu form.") 986 } 987 return lexDateAfterYear 988 case unicode.IsDigit(r): 989 return lexNumberOrDateOrStringOrIP 990 case r == '.': 991 // Assume float at first, but could be IP 992 return lexFloatStart 993 case isNumberSuffix(r): 994 return lexConvenientNumber 995 case !(isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r)): 996 // Treat it as a string value once we get a rune that 997 // is not a number. 998 lx.stringStateFn = lexString 999 return lexString 1000 } 1001 lx.backup() 1002 lx.emit(itemInteger) 1003 return lx.pop() 1004 } 1005 1006 // lexConvenientNumber is when we have a suffix, e.g. 1k or 1Mb 1007 func lexConvenientNumber(lx *lexer) stateFn { 1008 r := lx.next() 1009 switch { 1010 case r == 'b' || r == 'B' || r == 'i' || r == 'I': 1011 return lexConvenientNumber 1012 } 1013 lx.backup() 1014 if isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r) { 1015 lx.emit(itemInteger) 1016 return lx.pop() 1017 } 1018 // This is not a number, so treat it as a string. 1019 lx.stringStateFn = lexString 1020 return lexString 1021 } 1022 1023 // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format. 1024 // It assumes that "YYYY-" has already been consumed. 1025 func lexDateAfterYear(lx *lexer) stateFn { 1026 formats := []rune{ 1027 // digits are '0'. 1028 // everything else is direct equality. 1029 '0', '0', '-', '0', '0', 1030 'T', 1031 '0', '0', ':', '0', '0', ':', '0', '0', 1032 'Z', 1033 } 1034 for _, f := range formats { 1035 r := lx.next() 1036 if f == '0' { 1037 if !unicode.IsDigit(r) { 1038 return lx.errorf("Expected digit in ISO8601 datetime, "+ 1039 "but found '%v' instead.", r) 1040 } 1041 } else if f != r { 1042 return lx.errorf("Expected '%v' in ISO8601 datetime, "+ 1043 "but found '%v' instead.", f, r) 1044 } 1045 } 1046 lx.emit(itemDatetime) 1047 return lx.pop() 1048 } 1049 1050 // lexNegNumberStart consumes either an integer or a float. It assumes that a 1051 // negative sign has already been read, but that *no* digits have been consumed. 1052 // lexNegNumberStart will move to the appropriate integer or float states. 1053 func lexNegNumberStart(lx *lexer) stateFn { 1054 // we MUST see a digit. Even floats have to start with a digit. 1055 r := lx.next() 1056 if !unicode.IsDigit(r) { 1057 if r == '.' { 1058 return lx.errorf("Floats must start with a digit, not '.'.") 1059 } 1060 return lx.errorf("Expected a digit but got '%v'.", r) 1061 } 1062 return lexNegNumber 1063 } 1064 1065 // lexNegNumber consumes a negative integer or a float after seeing the first digit. 1066 func lexNegNumber(lx *lexer) stateFn { 1067 r := lx.next() 1068 switch { 1069 case unicode.IsDigit(r): 1070 return lexNegNumber 1071 case r == '.': 1072 return lexFloatStart 1073 case isNumberSuffix(r): 1074 return lexConvenientNumber 1075 } 1076 lx.backup() 1077 lx.emit(itemInteger) 1078 return lx.pop() 1079 } 1080 1081 // lexFloatStart starts the consumption of digits of a float after a '.'. 1082 // Namely, at least one digit is required. 1083 func lexFloatStart(lx *lexer) stateFn { 1084 r := lx.next() 1085 if !unicode.IsDigit(r) { 1086 return lx.errorf("Floats must have a digit after the '.', but got "+ 1087 "'%v' instead.", r) 1088 } 1089 return lexFloat 1090 } 1091 1092 // lexFloat consumes the digits of a float after a '.'. 1093 // Assumes that one digit has been consumed after a '.' already. 1094 func lexFloat(lx *lexer) stateFn { 1095 r := lx.next() 1096 if unicode.IsDigit(r) { 1097 return lexFloat 1098 } 1099 1100 // Not a digit, if its another '.', need to see if we falsely assumed a float. 1101 if r == '.' { 1102 return lexIPAddr 1103 } 1104 1105 lx.backup() 1106 lx.emit(itemFloat) 1107 return lx.pop() 1108 } 1109 1110 // lexIPAddr consumes IP addrs, like 127.0.0.1:4222 1111 func lexIPAddr(lx *lexer) stateFn { 1112 r := lx.next() 1113 if unicode.IsDigit(r) || r == '.' || r == ':' || r == '-' { 1114 return lexIPAddr 1115 } 1116 lx.backup() 1117 lx.emit(itemString) 1118 return lx.pop() 1119 } 1120 1121 // lexCommentStart begins the lexing of a comment. It will emit 1122 // itemCommentStart and consume no characters, passing control to lexComment. 1123 func lexCommentStart(lx *lexer) stateFn { 1124 lx.ignore() 1125 lx.emit(itemCommentStart) 1126 return lexComment 1127 } 1128 1129 // lexComment lexes an entire comment. It assumes that '#' has been consumed. 1130 // It will consume *up to* the first new line character, and pass control 1131 // back to the last state on the stack. 1132 func lexComment(lx *lexer) stateFn { 1133 r := lx.peek() 1134 if isNL(r) || r == eof { 1135 lx.emit(itemText) 1136 return lx.pop() 1137 } 1138 lx.next() 1139 return lexComment 1140 } 1141 1142 // lexSkip ignores all slurped input and moves on to the next state. 1143 func lexSkip(lx *lexer, nextState stateFn) stateFn { 1144 return func(lx *lexer) stateFn { 1145 lx.ignore() 1146 return nextState 1147 } 1148 } 1149 1150 // Tests to see if we have a number suffix 1151 func isNumberSuffix(r rune) bool { 1152 return r == 'k' || r == 'K' || r == 'm' || r == 'M' || r == 'g' || r == 'G' || r == 't' || r == 'T' || r == 'p' || r == 'P' || r == 'e' || r == 'E' 1153 } 1154 1155 // Tests for both key separators 1156 func isKeySeparator(r rune) bool { 1157 return r == keySepEqual || r == keySepColon 1158 } 1159 1160 // isWhitespace returns true if `r` is a whitespace character according 1161 // to the spec. 1162 func isWhitespace(r rune) bool { 1163 return r == '\t' || r == ' ' 1164 } 1165 1166 func isNL(r rune) bool { 1167 return r == '\n' || r == '\r' 1168 } 1169 1170 func (itype itemType) String() string { 1171 switch itype { 1172 case itemError: 1173 return "Error" 1174 case itemNIL: 1175 return "NIL" 1176 case itemEOF: 1177 return "EOF" 1178 case itemText: 1179 return "Text" 1180 case itemString: 1181 return "String" 1182 case itemBool: 1183 return "Bool" 1184 case itemInteger: 1185 return "Integer" 1186 case itemFloat: 1187 return "Float" 1188 case itemDatetime: 1189 return "DateTime" 1190 case itemKey: 1191 return "Key" 1192 case itemArrayStart: 1193 return "ArrayStart" 1194 case itemArrayEnd: 1195 return "ArrayEnd" 1196 case itemMapStart: 1197 return "MapStart" 1198 case itemMapEnd: 1199 return "MapEnd" 1200 case itemCommentStart: 1201 return "CommentStart" 1202 case itemVariable: 1203 return "Variable" 1204 case itemInclude: 1205 return "Include" 1206 } 1207 panic(fmt.Sprintf("BUG: Unknown type '%s'.", itype.String())) 1208 } 1209 1210 func (item item) String() string { 1211 return fmt.Sprintf("(%s, '%s', %d, %d)", item.typ.String(), item.val, item.line, item.pos) 1212 } 1213 1214 func escapeSpecial(c rune) string { 1215 switch c { 1216 case '\n': 1217 return "\\n" 1218 } 1219 return string(c) 1220 }