cuelang.org/go@v0.10.1/cue/scanner/scanner.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package scanner implements a scanner for CUE source text. It takes a []byte 16 // as source which can then be tokenized through repeated calls to the Scan 17 // method. 18 package scanner 19 20 import ( 21 "bytes" 22 "fmt" 23 "path/filepath" 24 "strconv" 25 "unicode" 26 "unicode/utf8" 27 28 "cuelang.org/go/cue/token" 29 ) 30 31 // An ErrorHandler is a generic error handler used throughout CUE packages. 32 // 33 // The position points to the beginning of the offending value. 34 type ErrorHandler func(pos token.Pos, msg string, args []interface{}) 35 36 // A Scanner holds the Scanner's internal state while processing 37 // a given text. It can be allocated as part of another data 38 // structure but must be initialized via Init before use. 39 type Scanner struct { 40 // immutable state 41 file *token.File // source file handle 42 dir string // directory portion of file.Name() 43 src []byte // source 44 errh ErrorHandler // error reporting; or nil 45 mode Mode // scanning mode 46 47 // scanning state 48 ch rune // current character 49 offset int // character offset 50 rdOffset int // reading offset (position after current character) 51 lineOffset int // current line offset 52 linesSinceLast int 53 spacesSinceLast int 54 insertEOL bool // insert a comma before next newline 55 56 quoteStack []quoteInfo 57 58 // public state - ok to modify 59 ErrorCount int // number of errors encountered 60 } 61 62 type quoteInfo struct { 63 char rune 64 numChar int 65 numHash int 66 } 67 68 const bom = 0xFEFF // byte order mark, only permitted as very first character 69 70 // Read the next Unicode char into s.ch. 71 // s.ch < 0 means end-of-file. 72 func (s *Scanner) next() { 73 if s.rdOffset < len(s.src) { 74 s.offset = s.rdOffset 75 if s.ch == '\n' { 76 s.lineOffset = s.offset 77 s.file.AddLine(s.offset) 78 } 79 r, w := rune(s.src[s.rdOffset]), 1 80 switch { 81 case r == 0: 82 s.errf(s.offset, "illegal character NUL") 83 case r >= utf8.RuneSelf: 84 // not ASCII 85 r, w = utf8.DecodeRune(s.src[s.rdOffset:]) 86 if r == utf8.RuneError && w == 1 { 87 s.errf(s.offset, "illegal UTF-8 encoding") 88 } else if r == bom && s.offset > 0 { 89 s.errf(s.offset, "illegal byte order mark") 90 } 91 } 92 s.rdOffset += w 93 s.ch = r 94 } else { 95 s.offset = len(s.src) 96 if s.ch == '\n' { 97 s.lineOffset = s.offset 98 s.file.AddLine(s.offset) 99 } 100 s.ch = -1 // eof 101 } 102 } 103 104 // A Mode value is a set of flags (or 0). 105 // They control scanner behavior. 106 type Mode uint 107 108 // These constants are options to the Init function. 109 const ( 110 ScanComments Mode = 1 << iota // return comments as COMMENT tokens 111 DontInsertCommas // do not automatically insert commas 112 ) 113 114 // Init prepares the scanner s to tokenize the text src by setting the 115 // scanner at the beginning of src. The scanner uses the file set file 116 // for position information and it adds line information for each line. 117 // It is ok to re-use the same file when re-scanning the same file as 118 // line information which is already present is ignored. Init causes a 119 // panic if the file size does not match the src size. 120 // 121 // Calls to Scan will invoke the error handler err if they encounter a 122 // syntax error and err is not nil. Also, for each error encountered, 123 // the Scanner field ErrorCount is incremented by one. The mode parameter 124 // determines how comments are handled. 125 // 126 // Note that Init may call err if there is an error in the first character 127 // of the file. 128 func (s *Scanner) Init(file *token.File, src []byte, eh ErrorHandler, mode Mode) { 129 // Explicitly initialize all fields since a scanner may be reused. 130 if file.Size() != len(src) { 131 panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) 132 } 133 s.file = file 134 s.dir, _ = filepath.Split(file.Name()) 135 s.src = src 136 s.errh = eh 137 s.mode = mode 138 139 s.ch = ' ' 140 s.offset = 0 141 s.rdOffset = 0 142 s.lineOffset = 0 143 s.insertEOL = false 144 s.ErrorCount = 0 145 146 s.next() 147 if s.ch == bom { 148 s.next() // ignore BOM at file beginning 149 } 150 } 151 152 func (s *Scanner) errf(offs int, msg string, args ...interface{}) { 153 if s.errh != nil { 154 s.errh(s.file.Pos(offs, 0), msg, args) 155 } 156 s.ErrorCount++ 157 } 158 159 var prefix = []byte("//line ") 160 161 func (s *Scanner) interpretLineComment(text []byte) { 162 if bytes.HasPrefix(text, prefix) { 163 // get filename and line number, if any 164 if i := bytes.LastIndex(text, []byte{':'}); i > 0 { 165 if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 { 166 // valid //line filename:line comment 167 filename := string(bytes.TrimSpace(text[len(prefix):i])) 168 if filename != "" { 169 filename = filepath.Clean(filename) 170 if !filepath.IsAbs(filename) { 171 // make filename relative to current directory 172 filename = filepath.Join(s.dir, filename) 173 } 174 } 175 // update scanner position 176 s.file.AddLineInfo(s.lineOffset+len(text)+1, filename, line) // +len(text)+1 since comment applies to next line 177 } 178 } 179 } 180 } 181 182 func (s *Scanner) scanComment() string { 183 // initial '/' already consumed; s.ch == '/' 184 offs := s.offset - 1 // position of initial '/' 185 hasCR := false 186 187 if s.ch == '/' { 188 //-style comment 189 s.next() 190 for s.ch != '\n' && s.ch >= 0 { 191 if s.ch == '\r' { 192 hasCR = true 193 } 194 s.next() 195 } 196 if offs == s.lineOffset { 197 // comment starts at the beginning of the current line 198 s.interpretLineComment(s.src[offs:s.offset]) 199 } 200 goto exit 201 } 202 203 s.errf(offs, "comment not terminated") 204 205 exit: 206 lit := s.src[offs:s.offset] 207 if hasCR { 208 // TODO: preserve /r/n 209 lit = stripCR(lit) 210 } 211 212 return string(lit) 213 } 214 215 func isLetter(ch rune) bool { 216 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) 217 } 218 219 func isDigit(ch rune) bool { 220 // TODO(mpvl): Is this correct? 221 return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch) 222 } 223 224 func (s *Scanner) scanFieldIdentifier() string { 225 offs := s.offset 226 if s.ch == '_' { 227 s.next() 228 } 229 if s.ch == '#' { 230 s.next() 231 // TODO: remove this block to allow #<num> 232 if isDigit(s.ch) { 233 return string(s.src[offs:s.offset]) 234 } 235 } 236 for isLetter(s.ch) || isDigit(s.ch) || s.ch == '_' || s.ch == '$' { 237 s.next() 238 } 239 return string(s.src[offs:s.offset]) 240 } 241 242 func (s *Scanner) scanIdentifier() string { 243 offs := s.offset 244 for isLetter(s.ch) || isDigit(s.ch) || s.ch == '_' || s.ch == '$' { 245 s.next() 246 } 247 return string(s.src[offs:s.offset]) 248 } 249 250 func digitVal(ch rune) int { 251 switch { 252 case '0' <= ch && ch <= '9': 253 return int(ch - '0') 254 case ch == '_': 255 return 0 256 case 'a' <= ch && ch <= 'f': 257 return int(ch - 'a' + 10) 258 case 'A' <= ch && ch <= 'F': 259 return int(ch - 'A' + 10) 260 } 261 return 16 // larger than any legal digit val 262 } 263 264 func (s *Scanner) scanMantissa(base int) { 265 var last rune 266 for digitVal(s.ch) < base { 267 if last == '_' && s.ch == '_' { 268 s.errf(s.offset, "illegal '_' in number") 269 } 270 last = s.ch 271 s.next() 272 } 273 if last == '_' { 274 s.errf(s.offset-1, "illegal '_' in number") 275 } 276 } 277 278 func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) { 279 // digitVal(s.ch) < 10 280 offs := s.offset 281 tok := token.INT 282 283 if seenDecimalPoint { 284 offs-- 285 tok = token.FLOAT 286 s.scanMantissa(10) 287 goto exponent 288 } 289 290 if s.ch == '0' { 291 // int or float 292 offs := s.offset 293 s.next() 294 if s.ch == 'x' || s.ch == 'X' { 295 // hexadecimal int 296 s.next() 297 s.scanMantissa(16) 298 if s.offset-offs <= 2 { 299 // only scanned "0x" or "0X" 300 s.errf(offs, "illegal hexadecimal number") 301 } 302 } else if s.ch == 'b' { 303 // binary int 304 s.next() 305 s.scanMantissa(2) 306 if s.offset-offs <= 2 { 307 // only scanned "0b" 308 s.errf(offs, "illegal binary number") 309 } 310 } else if s.ch == 'o' { 311 // octal int 312 s.next() 313 s.scanMantissa(8) 314 if s.offset-offs <= 2 { 315 // only scanned "0o" 316 s.errf(offs, "illegal octal number") 317 } 318 } else { 319 // 0 or float 320 seenDigits := false 321 if s.ch >= '0' && s.ch <= '9' { 322 seenDigits = true 323 s.scanMantissa(10) 324 } 325 if s.ch == '.' || s.ch == 'e' || s.ch == 'E' { 326 goto fraction 327 } 328 if seenDigits { 329 // integer other than 0 may not start with 0 330 s.errf(offs, "illegal integer number") 331 } 332 } 333 goto exit 334 } 335 336 // decimal int or float 337 s.scanMantissa(10) 338 339 // TODO: allow 3h4s, etc. 340 // switch s.ch { 341 // case 'h', 'm', 's', "ยต"[0], 'u', 'n': 342 // } 343 344 fraction: 345 if s.ch == '.' { 346 if p := s.offset + 1; p < len(s.src) && s.src[p] == '.' { 347 // interpret dot as part of a range. 348 goto exit 349 } 350 tok = token.FLOAT 351 s.next() 352 s.scanMantissa(10) 353 } 354 355 exponent: 356 switch s.ch { 357 case 'K', 'M', 'G', 'T', 'P': 358 tok = token.INT // TODO: Or should we allow this to be a float? 359 s.next() 360 if s.ch == 'i' { 361 s.next() 362 } 363 goto exit 364 } 365 366 if s.ch == 'e' || s.ch == 'E' { 367 tok = token.FLOAT 368 s.next() 369 if s.ch == '-' || s.ch == '+' { 370 s.next() 371 } 372 s.scanMantissa(10) 373 } 374 375 exit: 376 return tok, string(s.src[offs:s.offset]) 377 } 378 379 // scanEscape parses an escape sequence where rune is the accepted 380 // escaped quote. In case of a syntax error, it stops at the offending 381 // character (without consuming it) and returns false. Otherwise 382 // it returns true. 383 // 384 // Must be compliant with https://tools.ietf.org/html/rfc4627. 385 func (s *Scanner) scanEscape(quote quoteInfo) (ok, interpolation bool) { 386 for range quote.numHash { 387 if s.ch != '#' { 388 return true, false 389 } 390 s.next() 391 } 392 393 offs := s.offset 394 395 var n int 396 var base, max uint32 397 switch s.ch { 398 case '(': 399 return true, true 400 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '/', quote.char: 401 s.next() 402 return true, false 403 case '0', '1', '2', '3', '4', '5', '6', '7': 404 n, base, max = 3, 8, 255 405 case 'x': 406 s.next() 407 n, base, max = 2, 16, 255 408 case 'u': 409 s.next() 410 n, base, max = 4, 16, unicode.MaxRune 411 case 'U': 412 s.next() 413 n, base, max = 8, 16, unicode.MaxRune 414 default: 415 msg := "unknown escape sequence" 416 if s.ch < 0 { 417 msg = "escape sequence not terminated" 418 } 419 s.errf(offs, msg) 420 return false, false 421 } 422 423 var x uint32 424 for n > 0 { 425 d := uint32(digitVal(s.ch)) 426 if d >= base { 427 if s.ch < 0 { 428 s.errf(s.offset, "escape sequence not terminated") 429 } else { 430 s.errf(s.offset, "illegal character %#U in escape sequence", s.ch) 431 } 432 return false, false 433 } 434 x = x*base + d 435 s.next() 436 n-- 437 } 438 439 // TODO: this is valid JSON, so remove, but normalize and report an error 440 // if for unmatched surrogate pairs . 441 if x > max { 442 s.errf(offs, "escape sequence is invalid Unicode code point") 443 return false, false 444 } 445 446 return true, false 447 } 448 449 func (s *Scanner) scanString(offs int, quote quoteInfo) (token.Token, string) { 450 // ", """, ', or ''' opening already consumed 451 452 tok := token.STRING 453 454 hasCR := false 455 extra := 0 456 for { 457 ch := s.ch 458 if (quote.numChar != 3 && ch == '\n') || ch < 0 { 459 s.errf(offs, "string literal not terminated") 460 lit := s.src[offs:s.offset] 461 if hasCR { 462 lit = stripCR(lit) 463 } 464 return tok, string(lit) 465 } 466 467 s.next() 468 ch, ok := s.consumeStringClose(ch, quote) 469 if ok { 470 break 471 } 472 if ch == '\r' && quote.numChar == 3 { 473 hasCR = true 474 } 475 if ch == '\\' { 476 if _, interpolation := s.scanEscape(quote); interpolation { 477 tok = token.INTERPOLATION 478 extra = 1 479 s.quoteStack = append(s.quoteStack, quote) 480 break 481 } 482 } 483 } 484 lit := s.src[offs : s.offset+extra] 485 if hasCR { 486 lit = stripCR(lit) 487 } 488 return tok, string(lit) 489 } 490 491 func (s *Scanner) consumeQuotes(quote rune, max int) (next rune, n int) { 492 for ; n < max; n++ { 493 if s.ch != quote { 494 return s.ch, n 495 } 496 s.next() 497 } 498 return s.ch, n 499 } 500 501 func (s *Scanner) consumeStringClose(ch rune, quote quoteInfo) (next rune, atEnd bool) { 502 if quote.char != ch { 503 return ch, false 504 } 505 numChar := quote.numChar 506 n := numChar + quote.numHash 507 want := quote.char 508 for i := 1; i < n; i++ { 509 if i == numChar { 510 want = '#' 511 } 512 if want != s.ch { 513 return ch, false 514 } 515 ch = s.ch 516 s.next() 517 } 518 return s.ch, true 519 } 520 521 func (s *Scanner) scanHashes(maxHash int) int { 522 for i := range maxHash { 523 if s.ch != '#' { 524 return i 525 } 526 s.next() 527 } 528 return maxHash 529 } 530 531 func stripCR(b []byte) []byte { 532 c := make([]byte, len(b)) 533 i := 0 534 for _, ch := range b { 535 if ch != '\r' { 536 c[i] = ch 537 i++ 538 } 539 } 540 return c[:i] 541 } 542 543 // scanAttribute scans aa full attribute of the form @foo(str). An attribute 544 // is a lexical entry and as such whitespace is treated as normal characters 545 // within the attribute. 546 func (s *Scanner) scanAttribute() (tok token.Token, lit string) { 547 offs := s.offset - 1 // @ already consumed 548 549 s.scanIdentifier() 550 551 if _, tok, _ := s.Scan(); tok == token.LPAREN { 552 s.scanAttributeTokens(token.RPAREN) 553 } else { 554 s.errf(s.offset, "invalid attribute: expected '('") 555 } 556 return token.ATTRIBUTE, string(s.src[offs:s.offset]) 557 } 558 559 func (s *Scanner) scanAttributeTokens(close token.Token) { 560 for { 561 switch _, tok, _ := s.Scan(); tok { 562 case close: 563 return 564 case token.EOF: 565 s.errf(s.offset, "attribute missing '%s'", close) 566 return 567 568 case token.INTERPOLATION: 569 s.errf(s.offset, "interpolation not allowed in attribute") 570 s.popInterpolation() 571 s.recoverParen(1) 572 case token.LPAREN: 573 s.scanAttributeTokens(token.RPAREN) 574 case token.LBRACE: 575 s.scanAttributeTokens(token.RBRACE) 576 case token.LBRACK: 577 s.scanAttributeTokens(token.RBRACK) 578 case token.RPAREN, token.RBRACK, token.RBRACE: 579 s.errf(s.offset, "unexpected '%s'", tok) 580 } 581 } 582 } 583 584 // recoverParen is an approximate recovery mechanism to recover from invalid 585 // attributes. 586 func (s *Scanner) recoverParen(open int) { 587 for { 588 switch s.ch { 589 case '\n', -1: 590 return 591 case '(': 592 open++ 593 case ')': 594 if open--; open == 0 { 595 return 596 } 597 } 598 s.next() 599 } 600 } 601 602 func (s *Scanner) skipWhitespace(inc int) { 603 for { 604 switch s.ch { 605 case ' ', '\t': 606 s.spacesSinceLast += inc 607 case '\n': 608 s.linesSinceLast += inc 609 if s.insertEOL { 610 return 611 } 612 case '\r': 613 default: 614 return 615 } 616 s.next() 617 } 618 } 619 620 // Helper functions for scanning multi-byte tokens such as >> += >>= . 621 // Different routines recognize different length tok_i based on matches 622 // of ch_i. If a token ends in '=', the result is tok1 or tok3 623 // respectively. Otherwise, the result is tok0 if there was no other 624 // matching character, or tok2 if the matching character was ch2. 625 626 func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token { 627 if s.ch == '=' { 628 s.next() 629 return tok1 630 } 631 return tok0 632 } 633 634 func (s *Scanner) popInterpolation() quoteInfo { 635 quote := s.quoteStack[len(s.quoteStack)-1] 636 s.quoteStack = s.quoteStack[:len(s.quoteStack)-1] 637 return quote 638 } 639 640 // ResumeInterpolation resumes scanning of a string interpolation. 641 func (s *Scanner) ResumeInterpolation() string { 642 quote := s.popInterpolation() 643 _, str := s.scanString(s.offset-1, quote) 644 return str 645 } 646 647 // Offset returns the current position offset. 648 func (s *Scanner) Offset() int { 649 return s.offset 650 } 651 652 // Scan scans the next token and returns the token position, the token, 653 // and its literal string if applicable. The source end is indicated by 654 // EOF. 655 // 656 // If the returned token is a literal (IDENT, INT, FLOAT, 657 // IMAG, CHAR, STRING) or COMMENT, the literal string 658 // has the corresponding value. 659 // 660 // If the returned token is a keyword, the literal string is the keyword. 661 // 662 // If the returned token is Comma, the corresponding 663 // literal string is "," if the comma was present in the source, 664 // and "\n" if the semicolon was inserted because of a newline or 665 // at EOF. 666 // 667 // If the returned token is ILLEGAL, the literal string is the 668 // offending character. 669 // 670 // In all other cases, Scan returns an empty literal string. 671 // 672 // For more tolerant parsing, Scan will return a valid token if 673 // possible even if a syntax error was encountered. Thus, even 674 // if the resulting token sequence contains no illegal tokens, 675 // a client may not assume that no error occurred. Instead it 676 // must check the scanner's ErrorCount or the number of calls 677 // of the error handler, if there was one installed. 678 // 679 // Scan adds line information to the file added to the file 680 // set with Init. Token positions are relative to that file 681 // and thus relative to the file set. 682 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { 683 scanAgain: 684 s.skipWhitespace(1) 685 686 var rel token.RelPos 687 switch { 688 case s.linesSinceLast > 1: 689 rel = token.NewSection 690 case s.linesSinceLast == 1: 691 rel = token.Newline 692 case s.spacesSinceLast > 0: 693 rel = token.Blank 694 default: 695 rel = token.NoSpace 696 } 697 // current token start 698 offset := s.offset 699 pos = s.file.Pos(offset, rel) 700 701 // determine token value 702 insertEOL := false 703 var quote quoteInfo 704 switch ch := s.ch; { 705 case '0' <= ch && ch <= '9': 706 insertEOL = true 707 tok, lit = s.scanNumber(false) 708 case isLetter(ch), ch == '$', ch == '#': 709 lit = s.scanFieldIdentifier() 710 if len(lit) > 1 { 711 // keywords are longer than one letter - avoid lookup otherwise 712 tok = token.Lookup(lit) 713 insertEOL = true 714 break 715 } 716 if ch != '#' || (s.ch != '\'' && s.ch != '"' && s.ch != '#') { 717 tok = token.IDENT 718 insertEOL = true 719 break 720 } 721 quote.numHash = 1 722 ch = s.ch 723 fallthrough 724 default: 725 s.next() // always make progress 726 switch ch { 727 case -1: 728 if s.insertEOL { 729 s.insertEOL = false // EOF consumed 730 return s.file.Pos(offset, token.Elided), token.COMMA, "\n" 731 } 732 tok = token.EOF 733 case '_': 734 if s.ch == '|' { 735 // Unconditionally require this to be followed by another 736 // underscore to avoid needing an extra lookahead. 737 // Note that `_|x` is always equal to _. 738 s.next() 739 if s.ch != '_' { 740 s.errf(s.file.Offset(pos), "illegal token '_|'; expected '_'") 741 insertEOL = s.insertEOL // preserve insertComma info 742 tok = token.ILLEGAL 743 lit = "_|" 744 break 745 } 746 s.next() 747 tok = token.BOTTOM 748 lit = "_|_" 749 } else { 750 tok = token.IDENT 751 lit = "_" + s.scanFieldIdentifier() 752 } 753 insertEOL = true 754 755 case '\n': 756 // we only reach here if s.insertComma was 757 // set in the first place and exited early 758 // from s.skipWhitespace() 759 s.insertEOL = false // newline consumed 760 p := s.file.Pos(offset, token.Elided) 761 s.skipWhitespace(1) 762 // Don't elide comma before a ',' or ':' to ensure JSON 763 // conformance. Note that cue fmt should immediately undo those. 764 if s.ch == ',' || s.ch == ':' { 765 return s.Scan() 766 } 767 return p, token.COMMA, "\n" 768 769 case '#': 770 for quote.numHash++; s.ch == '#'; quote.numHash++ { 771 s.next() 772 } 773 ch = s.ch 774 if ch != '\'' && ch != '"' { 775 break 776 } 777 s.next() 778 fallthrough 779 case '"', '\'': 780 insertEOL = true 781 quote.char = ch 782 quote.numChar = 1 783 offs := s.offset - 1 - quote.numHash 784 switch _, n := s.consumeQuotes(ch, 2); n { 785 case 0: 786 quote.numChar = 1 787 tok, lit = s.scanString(offs, quote) 788 case 1: 789 // When the string is surrounded by hashes, 790 // a single leading quote is OK (and part of the string) 791 // e.g. #""hello""# 792 // unless it's succeeded by the correct number of terminating 793 // hash characters 794 // e.g. ##""## 795 if n := s.scanHashes(quote.numHash); n == quote.numHash { 796 // It's the empty string. 797 tok, lit = token.STRING, string(s.src[offs:s.offset]) 798 } else { 799 tok, lit = s.scanString(offs, quote) 800 } 801 case 2: 802 quote.numChar = 3 803 switch s.ch { 804 case '\n': 805 s.next() 806 tok, lit = s.scanString(offs, quote) 807 case '\r': 808 s.next() 809 if s.ch == '\n' { 810 s.next() 811 tok, lit = s.scanString(offs, quote) 812 break 813 } 814 fallthrough 815 default: 816 s.errf(offs, "expected newline after multiline quote %s", 817 s.src[offs:s.offset]) 818 tok, lit = token.STRING, string(s.src[offs:s.offset]) 819 } 820 } 821 case '@': 822 insertEOL = true 823 tok, lit = s.scanAttribute() 824 case ':': 825 tok = token.COLON 826 case ';': 827 tok = token.SEMICOLON 828 insertEOL = true 829 case '?': 830 tok = token.OPTION 831 insertEOL = true 832 case '.': 833 if '0' <= s.ch && s.ch <= '9' { 834 insertEOL = true 835 tok, lit = s.scanNumber(true) 836 } else if s.ch == '.' { 837 s.next() 838 if s.ch == '.' { 839 s.next() 840 tok = token.ELLIPSIS 841 insertEOL = true 842 } else { 843 s.errf(s.file.Offset(pos), "illegal token '..'; expected '.'") 844 } 845 } else { 846 tok = token.PERIOD 847 } 848 case ',': 849 tok = token.COMMA 850 lit = "," 851 case '(': 852 tok = token.LPAREN 853 case ')': 854 insertEOL = true 855 tok = token.RPAREN 856 case '[': 857 tok = token.LBRACK 858 case ']': 859 insertEOL = true 860 tok = token.RBRACK 861 case '{': 862 tok = token.LBRACE 863 case '}': 864 insertEOL = true 865 tok = token.RBRACE 866 case '+': 867 tok = token.ADD // Consider ++ for list concatenate. 868 case '-': 869 tok = token.SUB 870 case '*': 871 tok = token.MUL 872 case '/': 873 if s.ch == '/' { 874 // comment 875 if s.insertEOL { 876 // reset position to the beginning of the comment 877 s.ch = '/' 878 s.offset = s.file.Offset(pos) 879 s.rdOffset = s.offset + 1 880 s.insertEOL = false // newline consumed 881 return s.file.Pos(offset, token.Elided), token.COMMA, "\n" 882 } 883 comment := s.scanComment() 884 if s.mode&ScanComments == 0 { 885 // skip comment 886 s.insertEOL = false // newline consumed 887 goto scanAgain 888 } 889 tok = token.COMMENT 890 lit = comment 891 } else { 892 tok = token.QUO 893 } 894 // We no longer use %, but seems like a useful token to use for 895 // something else at some point. 896 // case '%': 897 case '<': 898 if s.ch == '-' { 899 s.next() 900 tok = token.ARROW 901 } else { 902 tok = s.switch2(token.LSS, token.LEQ) 903 } 904 case '>': 905 tok = s.switch2(token.GTR, token.GEQ) 906 case '=': 907 if s.ch == '~' { 908 s.next() 909 tok = token.MAT 910 } else { 911 tok = s.switch2(token.BIND, token.EQL) 912 } 913 case '!': 914 if s.ch == '~' { 915 s.next() 916 tok = token.NMAT 917 } else { 918 tok = s.switch2(token.NOT, token.NEQ) 919 } 920 case '&': 921 switch s.ch { 922 case '&': 923 s.next() 924 tok = token.LAND 925 default: 926 tok = token.AND 927 } 928 case '|': 929 if s.ch == '|' { 930 s.next() 931 tok = token.LOR 932 } else { 933 tok = token.OR 934 } 935 default: 936 // next reports unexpected BOMs - don't repeat 937 if ch != bom { 938 s.errf(s.file.Offset(pos), "illegal character %#U", ch) 939 } 940 insertEOL = s.insertEOL // preserve insertSemi info 941 tok = token.ILLEGAL 942 lit = string(ch) 943 } 944 } 945 if s.mode&DontInsertCommas == 0 { 946 s.insertEOL = insertEOL 947 } 948 949 s.linesSinceLast = 0 950 s.spacesSinceLast = 0 951 return 952 }