cuelang.org/go@v0.13.0/cue/scanner/scanner.go (about) 1 // Copyright 2018 The CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package scanner implements a scanner for CUE source text. It takes a []byte 16 // as source which can then be tokenized through repeated calls to the Scan 17 // method. 18 package scanner 19 20 import ( 21 "fmt" 22 "path/filepath" 23 "unicode" 24 "unicode/utf8" 25 26 "cuelang.org/go/cue/token" 27 ) 28 29 // An ErrorHandler is a generic error handler used throughout CUE packages. 30 // 31 // The position points to the beginning of the offending value. 32 type ErrorHandler func(pos token.Pos, msg string, args []interface{}) 33 34 // A Scanner holds the Scanner's internal state while processing 35 // a given text. It can be allocated as part of another data 36 // structure but must be initialized via Init before use. 37 type Scanner struct { 38 // immutable state 39 file *token.File // source file handle 40 dir string // directory portion of file.Name() 41 src []byte // source 42 errh ErrorHandler // error reporting; or nil 43 mode Mode // scanning mode 44 45 // scanning state 46 ch rune // current character 47 offset int // character offset 48 rdOffset int // reading offset (position after current character) 49 linesSinceLast int 50 spacesSinceLast int 51 insertEOL bool // insert a comma before next newline 52 53 quoteStack []quoteInfo 54 55 // public state - ok to modify 56 ErrorCount int // number of errors encountered 57 } 58 59 type quoteInfo struct { 60 char rune 61 numChar int 62 numHash int 63 } 64 65 const bom = 0xFEFF // byte order mark, only permitted as very first character 66 67 // Read the next Unicode char into s.ch. 68 // s.ch < 0 means end-of-file. 69 func (s *Scanner) next() { 70 if s.rdOffset < len(s.src) { 71 s.offset = s.rdOffset 72 if s.ch == '\n' { 73 s.file.AddLine(s.offset) 74 } 75 r, w := rune(s.src[s.rdOffset]), 1 76 switch { 77 case r == 0: 78 s.errf(s.offset, "illegal character NUL") 79 case r >= utf8.RuneSelf: 80 // not ASCII 81 r, w = utf8.DecodeRune(s.src[s.rdOffset:]) 82 if r == utf8.RuneError && w == 1 { 83 s.errf(s.offset, "illegal UTF-8 encoding") 84 } else if r == bom && s.offset > 0 { 85 s.errf(s.offset, "illegal byte order mark") 86 } 87 } 88 s.rdOffset += w 89 s.ch = r 90 } else { 91 s.offset = len(s.src) 92 if s.ch == '\n' { 93 s.file.AddLine(s.offset) 94 } 95 s.ch = -1 // eof 96 } 97 } 98 99 // A Mode value is a set of flags (or 0). 100 // They control scanner behavior. 101 type Mode uint 102 103 // These constants are options to the Init function. 104 const ( 105 ScanComments Mode = 1 << iota // return comments as COMMENT tokens 106 DontInsertCommas // do not automatically insert commas 107 ) 108 109 // Init prepares the scanner s to tokenize the text src by setting the 110 // scanner at the beginning of src. The scanner uses the file set file 111 // for position information and it adds line information for each line. 112 // It is ok to re-use the same file when re-scanning the same file as 113 // line information which is already present is ignored. Init causes a 114 // panic if the file size does not match the src size. 115 // 116 // Calls to Scan will invoke the error handler err if they encounter a 117 // syntax error and err is not nil. Also, for each error encountered, 118 // the Scanner field ErrorCount is incremented by one. The mode parameter 119 // determines how comments are handled. 120 // 121 // Note that Init may call err if there is an error in the first character 122 // of the file. 123 func (s *Scanner) Init(file *token.File, src []byte, eh ErrorHandler, mode Mode) { 124 // Explicitly initialize all fields since a scanner may be reused. 125 if file.Size() != len(src) { 126 panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) 127 } 128 s.file = file 129 s.dir, _ = filepath.Split(file.Name()) 130 s.src = src 131 s.errh = eh 132 s.mode = mode 133 134 s.ch = ' ' 135 s.offset = 0 136 s.rdOffset = 0 137 s.insertEOL = false 138 s.ErrorCount = 0 139 140 s.next() 141 if s.ch == bom { 142 s.next() // ignore BOM at file beginning 143 } 144 } 145 146 func (s *Scanner) errf(offs int, msg string, args ...interface{}) { 147 if s.errh != nil { 148 s.errh(s.file.Pos(offs, 0), msg, args) 149 } 150 s.ErrorCount++ 151 } 152 153 func (s *Scanner) scanComment() string { 154 // initial '/' already consumed; s.ch == '/' 155 offs := s.offset - 1 // position of initial '/' 156 hasCR := false 157 158 if s.ch == '/' { 159 //-style comment 160 s.next() 161 for s.ch != '\n' && s.ch >= 0 { 162 if s.ch == '\r' { 163 hasCR = true 164 } 165 s.next() 166 } 167 goto exit 168 } 169 170 s.errf(offs, "comment not terminated") 171 172 exit: 173 lit := s.src[offs:s.offset] 174 if hasCR { 175 // TODO: preserve /r/n 176 lit = stripCR(lit) 177 } 178 179 return string(lit) 180 } 181 182 func isLetter(ch rune) bool { 183 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) 184 } 185 186 func isDigit(ch rune) bool { 187 // TODO(mpvl): Is this correct? 188 return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch) 189 } 190 191 func (s *Scanner) scanFieldIdentifier() string { 192 offs := s.offset 193 if s.ch == '_' { 194 s.next() 195 } 196 if s.ch == '#' { 197 s.next() 198 // TODO: remove this block to allow #<num> 199 if isDigit(s.ch) { 200 return string(s.src[offs:s.offset]) 201 } 202 } 203 for isLetter(s.ch) || isDigit(s.ch) || s.ch == '_' || s.ch == '$' { 204 s.next() 205 } 206 return string(s.src[offs:s.offset]) 207 } 208 209 func (s *Scanner) scanIdentifier() string { 210 offs := s.offset 211 for isLetter(s.ch) || isDigit(s.ch) || s.ch == '_' || s.ch == '$' { 212 s.next() 213 } 214 return string(s.src[offs:s.offset]) 215 } 216 217 func digitVal(ch rune) int { 218 switch { 219 case '0' <= ch && ch <= '9': 220 return int(ch - '0') 221 case ch == '_': 222 return 0 223 case 'a' <= ch && ch <= 'f': 224 return int(ch - 'a' + 10) 225 case 'A' <= ch && ch <= 'F': 226 return int(ch - 'A' + 10) 227 } 228 return 16 // larger than any legal digit val 229 } 230 231 func (s *Scanner) scanMantissa(base int) { 232 var last rune 233 for digitVal(s.ch) < base { 234 if last == '_' && s.ch == '_' { 235 s.errf(s.offset, "illegal '_' in number") 236 } 237 last = s.ch 238 s.next() 239 } 240 if last == '_' { 241 s.errf(s.offset-1, "illegal '_' in number") 242 } 243 } 244 245 func (s *Scanner) scanNumber(seenDecimalPoint bool) (token.Token, string) { 246 // digitVal(s.ch) < 10 247 offs := s.offset 248 tok := token.INT 249 250 if seenDecimalPoint { 251 offs-- 252 tok = token.FLOAT 253 s.scanMantissa(10) 254 goto exponent 255 } 256 257 if s.ch == '0' { 258 // int or float 259 offs := s.offset 260 s.next() 261 if s.ch == 'x' || s.ch == 'X' { 262 // hexadecimal int 263 s.next() 264 s.scanMantissa(16) 265 if s.offset-offs <= 2 { 266 // only scanned "0x" or "0X" 267 s.errf(offs, "illegal hexadecimal number") 268 } 269 } else if s.ch == 'b' { 270 // binary int 271 s.next() 272 s.scanMantissa(2) 273 if s.offset-offs <= 2 { 274 // only scanned "0b" 275 s.errf(offs, "illegal binary number") 276 } 277 } else if s.ch == 'o' { 278 // octal int 279 s.next() 280 s.scanMantissa(8) 281 if s.offset-offs <= 2 { 282 // only scanned "0o" 283 s.errf(offs, "illegal octal number") 284 } 285 } else { 286 // 0 or float 287 seenDigits := false 288 if s.ch >= '0' && s.ch <= '9' { 289 seenDigits = true 290 s.scanMantissa(10) 291 } 292 if s.ch == '.' || s.ch == 'e' || s.ch == 'E' { 293 goto fraction 294 } 295 if seenDigits { 296 // integer other than 0 may not start with 0 297 s.errf(offs, "illegal integer number") 298 } 299 } 300 goto exit 301 } 302 303 // decimal int or float 304 s.scanMantissa(10) 305 306 // TODO: allow 3h4s, etc. 307 // switch s.ch { 308 // case 'h', 'm', 's', "ยต"[0], 'u', 'n': 309 // } 310 311 fraction: 312 if s.ch == '.' { 313 if p := s.offset + 1; p < len(s.src) && s.src[p] == '.' { 314 // interpret dot as part of a range. 315 goto exit 316 } 317 tok = token.FLOAT 318 s.next() 319 s.scanMantissa(10) 320 } 321 322 exponent: 323 switch s.ch { 324 case 'K', 'M', 'G', 'T', 'P': 325 tok = token.INT // TODO: Or should we allow this to be a float? 326 s.next() 327 if s.ch == 'i' { 328 s.next() 329 } 330 goto exit 331 } 332 333 if s.ch == 'e' || s.ch == 'E' { 334 tok = token.FLOAT 335 s.next() 336 if s.ch == '-' || s.ch == '+' { 337 s.next() 338 } 339 s.scanMantissa(10) 340 } 341 342 exit: 343 return tok, string(s.src[offs:s.offset]) 344 } 345 346 // scanEscape parses an escape sequence where rune is the accepted 347 // escaped quote. In case of a syntax error, it stops at the offending 348 // character (without consuming it) and returns false. Otherwise 349 // it returns true. 350 // 351 // Must be compliant with https://tools.ietf.org/html/rfc4627. 352 func (s *Scanner) scanEscape(quote quoteInfo) (ok, interpolation bool) { 353 for range quote.numHash { 354 if s.ch != '#' { 355 return true, false 356 } 357 s.next() 358 } 359 360 offs := s.offset 361 362 var n int 363 var base, max uint32 364 switch s.ch { 365 case '(': 366 return true, true 367 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '/', quote.char: 368 s.next() 369 return true, false 370 case '0', '1', '2', '3', '4', '5', '6', '7': 371 n, base, max = 3, 8, 255 372 case 'x': 373 s.next() 374 n, base, max = 2, 16, 255 375 case 'u': 376 s.next() 377 n, base, max = 4, 16, unicode.MaxRune 378 case 'U': 379 s.next() 380 n, base, max = 8, 16, unicode.MaxRune 381 default: 382 msg := "unknown escape sequence" 383 if s.ch < 0 { 384 msg = "escape sequence not terminated" 385 } 386 s.errf(offs, msg) 387 return false, false 388 } 389 390 var x uint32 391 for n > 0 { 392 d := uint32(digitVal(s.ch)) 393 if d >= base { 394 if s.ch < 0 { 395 s.errf(s.offset, "escape sequence not terminated") 396 } else { 397 s.errf(s.offset, "illegal character %#U in escape sequence", s.ch) 398 } 399 return false, false 400 } 401 x = x*base + d 402 s.next() 403 n-- 404 } 405 406 // TODO: this is valid JSON, so remove, but normalize and report an error 407 // if for unmatched surrogate pairs . 408 if x > max { 409 s.errf(offs, "escape sequence is invalid Unicode code point") 410 return false, false 411 } 412 413 return true, false 414 } 415 416 func (s *Scanner) scanString(offs int, quote quoteInfo) (token.Token, string) { 417 // ", """, ', or ''' opening already consumed 418 419 tok := token.STRING 420 421 hasCR := false 422 extra := 0 423 for { 424 ch := s.ch 425 if (quote.numChar != 3 && ch == '\n') || ch < 0 { 426 s.errf(offs, "string literal not terminated") 427 lit := s.src[offs:s.offset] 428 if hasCR { 429 lit = stripCR(lit) 430 } 431 return tok, string(lit) 432 } 433 434 s.next() 435 ch, ok := s.consumeStringClose(ch, quote) 436 if ok { 437 break 438 } 439 if ch == '\r' && quote.numChar == 3 { 440 hasCR = true 441 } 442 if ch == '\\' { 443 if _, interpolation := s.scanEscape(quote); interpolation { 444 tok = token.INTERPOLATION 445 extra = 1 446 s.quoteStack = append(s.quoteStack, quote) 447 break 448 } 449 } 450 } 451 lit := s.src[offs : s.offset+extra] 452 if hasCR { 453 lit = stripCR(lit) 454 } 455 return tok, string(lit) 456 } 457 458 func (s *Scanner) consumeQuotes(quote rune, max int) (next rune, n int) { 459 for ; n < max; n++ { 460 if s.ch != quote { 461 return s.ch, n 462 } 463 s.next() 464 } 465 return s.ch, n 466 } 467 468 func (s *Scanner) consumeStringClose(ch rune, quote quoteInfo) (next rune, atEnd bool) { 469 if quote.char != ch { 470 return ch, false 471 } 472 numChar := quote.numChar 473 n := numChar + quote.numHash 474 want := quote.char 475 for i := 1; i < n; i++ { 476 if i == numChar { 477 want = '#' 478 } 479 if want != s.ch { 480 return ch, false 481 } 482 ch = s.ch 483 s.next() 484 } 485 return s.ch, true 486 } 487 488 func (s *Scanner) scanHashes(maxHash int) int { 489 for i := range maxHash { 490 if s.ch != '#' { 491 return i 492 } 493 s.next() 494 } 495 return maxHash 496 } 497 498 func stripCR(b []byte) []byte { 499 c := make([]byte, len(b)) 500 i := 0 501 for _, ch := range b { 502 if ch != '\r' { 503 c[i] = ch 504 i++ 505 } 506 } 507 return c[:i] 508 } 509 510 // scanAttribute scans aa full attribute of the form @foo(str). An attribute 511 // is a lexical entry and as such whitespace is treated as normal characters 512 // within the attribute. 513 func (s *Scanner) scanAttribute() (tok token.Token, lit string) { 514 offs := s.offset - 1 // @ already consumed 515 516 s.scanIdentifier() 517 518 if _, tok, _ := s.Scan(); tok == token.LPAREN { 519 s.scanAttributeTokens(token.RPAREN) 520 } else { 521 s.errf(s.offset, "invalid attribute: expected '('") 522 } 523 return token.ATTRIBUTE, string(s.src[offs:s.offset]) 524 } 525 526 func (s *Scanner) scanAttributeTokens(close token.Token) { 527 for { 528 switch _, tok, _ := s.Scan(); tok { 529 case close: 530 return 531 case token.EOF: 532 s.errf(s.offset, "attribute missing '%s'", close) 533 return 534 535 case token.INTERPOLATION: 536 s.errf(s.offset, "interpolation not allowed in attribute") 537 s.popInterpolation() 538 s.recoverParen(1) 539 case token.LPAREN: 540 s.scanAttributeTokens(token.RPAREN) 541 case token.LBRACE: 542 s.scanAttributeTokens(token.RBRACE) 543 case token.LBRACK: 544 s.scanAttributeTokens(token.RBRACK) 545 case token.RPAREN, token.RBRACK, token.RBRACE: 546 s.errf(s.offset, "unexpected '%s'", tok) 547 } 548 } 549 } 550 551 // recoverParen is an approximate recovery mechanism to recover from invalid 552 // attributes. 553 func (s *Scanner) recoverParen(open int) { 554 for { 555 switch s.ch { 556 case '\n', -1: 557 return 558 case '(': 559 open++ 560 case ')': 561 if open--; open == 0 { 562 return 563 } 564 } 565 s.next() 566 } 567 } 568 569 func (s *Scanner) skipWhitespace(inc int) { 570 for { 571 switch s.ch { 572 case ' ', '\t': 573 s.spacesSinceLast += inc 574 case '\n': 575 s.linesSinceLast += inc 576 if s.insertEOL { 577 return 578 } 579 case '\r': 580 default: 581 return 582 } 583 s.next() 584 } 585 } 586 587 // Helper functions for scanning multi-byte tokens such as >> += >>= . 588 // Different routines recognize different length tok_i based on matches 589 // of ch_i. If a token ends in '=', the result is tok1 or tok3 590 // respectively. Otherwise, the result is tok0 if there was no other 591 // matching character, or tok2 if the matching character was ch2. 592 593 func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token { 594 if s.ch == '=' { 595 s.next() 596 return tok1 597 } 598 return tok0 599 } 600 601 func (s *Scanner) popInterpolation() quoteInfo { 602 quote := s.quoteStack[len(s.quoteStack)-1] 603 s.quoteStack = s.quoteStack[:len(s.quoteStack)-1] 604 return quote 605 } 606 607 // ResumeInterpolation resumes scanning of a string interpolation. 608 func (s *Scanner) ResumeInterpolation() string { 609 quote := s.popInterpolation() 610 _, str := s.scanString(s.offset-1, quote) 611 return str 612 } 613 614 // Offset returns the current position offset. 615 func (s *Scanner) Offset() int { 616 return s.offset 617 } 618 619 // Scan scans the next token and returns the token position, the token, 620 // and its literal string if applicable. The source end is indicated by 621 // EOF. 622 // 623 // If the returned token is a literal (IDENT, INT, FLOAT, 624 // IMAG, CHAR, STRING) or COMMENT, the literal string 625 // has the corresponding value. 626 // 627 // If the returned token is a keyword, the literal string is the keyword. 628 // 629 // If the returned token is Comma, the corresponding 630 // literal string is "," if the comma was present in the source, 631 // and "\n" if the semicolon was inserted because of a newline or 632 // at EOF. 633 // 634 // If the returned token is ILLEGAL, the literal string is the 635 // offending character. 636 // 637 // In all other cases, Scan returns an empty literal string. 638 // 639 // For more tolerant parsing, Scan will return a valid token if 640 // possible even if a syntax error was encountered. Thus, even 641 // if the resulting token sequence contains no illegal tokens, 642 // a client may not assume that no error occurred. Instead it 643 // must check the scanner's ErrorCount or the number of calls 644 // of the error handler, if there was one installed. 645 // 646 // Scan adds line information to the file added to the file 647 // set with Init. Token positions are relative to that file 648 // and thus relative to the file set. 649 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { 650 scanAgain: 651 s.skipWhitespace(1) 652 653 var rel token.RelPos 654 switch { 655 case s.linesSinceLast > 1: 656 rel = token.NewSection 657 case s.linesSinceLast == 1: 658 rel = token.Newline 659 case s.spacesSinceLast > 0: 660 rel = token.Blank 661 default: 662 rel = token.NoSpace 663 } 664 // current token start 665 offset := s.offset 666 pos = s.file.Pos(offset, rel) 667 668 // determine token value 669 insertEOL := false 670 var quote quoteInfo 671 switch ch := s.ch; { 672 case '0' <= ch && ch <= '9': 673 insertEOL = true 674 tok, lit = s.scanNumber(false) 675 case isLetter(ch), ch == '$', ch == '#': 676 lit = s.scanFieldIdentifier() 677 if len(lit) > 1 { 678 // keywords are longer than one letter - avoid lookup otherwise 679 tok = token.Lookup(lit) 680 insertEOL = true 681 break 682 } 683 if ch != '#' || (s.ch != '\'' && s.ch != '"' && s.ch != '#') { 684 tok = token.IDENT 685 insertEOL = true 686 break 687 } 688 quote.numHash = 1 689 ch = s.ch 690 fallthrough 691 default: 692 s.next() // always make progress 693 switch ch { 694 case -1: 695 if s.insertEOL { 696 s.insertEOL = false // EOF consumed 697 return s.file.Pos(offset, token.Elided), token.COMMA, "\n" 698 } 699 tok = token.EOF 700 case '_': 701 if s.ch == '|' { 702 // Unconditionally require this to be followed by another 703 // underscore to avoid needing an extra lookahead. 704 // Note that `_|x` is always equal to _. 705 s.next() 706 if s.ch != '_' { 707 s.errf(s.file.Offset(pos), "illegal token '_|'; expected '_'") 708 insertEOL = s.insertEOL // preserve insertComma info 709 tok = token.ILLEGAL 710 lit = "_|" 711 break 712 } 713 s.next() 714 tok = token.BOTTOM 715 lit = "_|_" 716 } else { 717 tok = token.IDENT 718 lit = "_" + s.scanFieldIdentifier() 719 } 720 insertEOL = true 721 722 case '\n': 723 // we only reach here if s.insertComma was 724 // set in the first place and exited early 725 // from s.skipWhitespace() 726 s.insertEOL = false // newline consumed 727 p := s.file.Pos(offset, token.Elided) 728 s.skipWhitespace(1) 729 // Don't elide comma before a ',' or ':' to ensure JSON 730 // conformance. Note that cue fmt should immediately undo those. 731 if s.ch == ',' || s.ch == ':' { 732 return s.Scan() 733 } 734 return p, token.COMMA, "\n" 735 736 case '#': 737 for quote.numHash++; s.ch == '#'; quote.numHash++ { 738 s.next() 739 } 740 ch = s.ch 741 if ch != '\'' && ch != '"' { 742 break 743 } 744 s.next() 745 fallthrough 746 case '"', '\'': 747 insertEOL = true 748 quote.char = ch 749 quote.numChar = 1 750 offs := s.offset - 1 - quote.numHash 751 switch _, n := s.consumeQuotes(ch, 2); n { 752 case 0: 753 quote.numChar = 1 754 tok, lit = s.scanString(offs, quote) 755 case 1: 756 // When the string is surrounded by hashes, 757 // a single leading quote is OK (and part of the string) 758 // e.g. #""hello""# 759 // unless it's succeeded by the correct number of terminating 760 // hash characters 761 // e.g. ##""## 762 if n := s.scanHashes(quote.numHash); n == quote.numHash { 763 // It's the empty string. 764 tok, lit = token.STRING, string(s.src[offs:s.offset]) 765 } else { 766 tok, lit = s.scanString(offs, quote) 767 } 768 case 2: 769 quote.numChar = 3 770 switch s.ch { 771 case '\n': 772 s.next() 773 tok, lit = s.scanString(offs, quote) 774 case '\r': 775 s.next() 776 if s.ch == '\n' { 777 s.next() 778 tok, lit = s.scanString(offs, quote) 779 break 780 } 781 fallthrough 782 default: 783 s.errf(offs, "expected newline after multiline quote %s", 784 s.src[offs:s.offset]) 785 tok, lit = token.STRING, string(s.src[offs:s.offset]) 786 } 787 } 788 case '@': 789 insertEOL = true 790 tok, lit = s.scanAttribute() 791 case ':': 792 tok = token.COLON 793 case ';': 794 tok = token.SEMICOLON 795 insertEOL = true 796 case '?': 797 tok = token.OPTION 798 insertEOL = true 799 case '.': 800 if '0' <= s.ch && s.ch <= '9' { 801 insertEOL = true 802 tok, lit = s.scanNumber(true) 803 } else if s.ch == '.' { 804 s.next() 805 if s.ch == '.' { 806 s.next() 807 tok = token.ELLIPSIS 808 insertEOL = true 809 } else { 810 s.errf(s.file.Offset(pos), "illegal token '..'; expected '.'") 811 } 812 } else { 813 tok = token.PERIOD 814 } 815 case ',': 816 tok = token.COMMA 817 lit = "," 818 case '(': 819 tok = token.LPAREN 820 case ')': 821 insertEOL = true 822 tok = token.RPAREN 823 case '[': 824 tok = token.LBRACK 825 case ']': 826 insertEOL = true 827 tok = token.RBRACK 828 case '{': 829 tok = token.LBRACE 830 case '}': 831 insertEOL = true 832 tok = token.RBRACE 833 case '+': 834 tok = token.ADD // Consider ++ for list concatenate. 835 case '-': 836 tok = token.SUB 837 case '*': 838 tok = token.MUL 839 case '/': 840 if s.ch == '/' { 841 // comment 842 if s.insertEOL { 843 // reset position to the beginning of the comment 844 s.ch = '/' 845 s.offset = s.file.Offset(pos) 846 s.rdOffset = s.offset + 1 847 s.insertEOL = false // newline consumed 848 return s.file.Pos(offset, token.Elided), token.COMMA, "\n" 849 } 850 comment := s.scanComment() 851 if s.mode&ScanComments == 0 { 852 // skip comment 853 s.insertEOL = false // newline consumed 854 goto scanAgain 855 } 856 tok = token.COMMENT 857 lit = comment 858 } else { 859 tok = token.QUO 860 } 861 // We no longer use %, but seems like a useful token to use for 862 // something else at some point. 863 // case '%': 864 case '<': 865 if s.ch == '-' { 866 s.next() 867 tok = token.ARROW 868 } else { 869 tok = s.switch2(token.LSS, token.LEQ) 870 } 871 case '>': 872 tok = s.switch2(token.GTR, token.GEQ) 873 case '=': 874 if s.ch == '~' { 875 s.next() 876 tok = token.MAT 877 } else { 878 tok = s.switch2(token.BIND, token.EQL) 879 } 880 case '!': 881 if s.ch == '~' { 882 s.next() 883 tok = token.NMAT 884 } else { 885 tok = s.switch2(token.NOT, token.NEQ) 886 } 887 case '&': 888 switch s.ch { 889 case '&': 890 s.next() 891 tok = token.LAND 892 default: 893 tok = token.AND 894 } 895 case '|': 896 if s.ch == '|' { 897 s.next() 898 tok = token.LOR 899 } else { 900 tok = token.OR 901 } 902 default: 903 // next reports unexpected BOMs - don't repeat 904 if ch != bom { 905 s.errf(s.file.Offset(pos), "illegal character %#U", ch) 906 } 907 insertEOL = s.insertEOL // preserve insertSemi info 908 tok = token.ILLEGAL 909 lit = string(ch) 910 } 911 } 912 if s.mode&DontInsertCommas == 0 { 913 s.insertEOL = insertEOL 914 } 915 916 s.linesSinceLast = 0 917 s.spacesSinceLast = 0 918 return 919 }