github.com/XiaoMi/Gaea@v1.2.5/parser/lexer.go (about) 1 // Copyright 2016 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package parser 15 16 import ( 17 "bytes" 18 "fmt" 19 "strconv" 20 "strings" 21 "unicode" 22 "unicode/utf8" 23 24 "github.com/XiaoMi/Gaea/mysql" 25 ) 26 27 var _ = yyLexer(&Scanner{}) 28 29 // Pos represents the position of a token. 30 type Pos struct { 31 Line int 32 Col int 33 Offset int 34 } 35 36 // Scanner implements the yyLexer interface. 37 type Scanner struct { 38 r reader 39 buf bytes.Buffer 40 41 errs []error 42 warns []error 43 stmtStartPos int 44 45 // For scanning such kind of comment: /*! MySQL-specific code */ or /*+ optimizer hint */ 46 specialComment specialCommentScanner 47 48 sqlMode mysql.SQLMode 49 50 // If the lexer should recognize keywords for window function. 51 // It may break the compatibility when support those keywords, 52 // because some application may already use them as identifiers. 53 supportWindowFunc bool 54 55 // lastScanOffset indicates last offset returned by scan(). 56 // It's used to substring sql in syntax error message. 57 lastScanOffset int 58 } 59 60 type specialCommentScanner interface { 61 scan() (tok int, pos Pos, lit string) 62 } 63 64 type mysqlSpecificCodeScanner struct { 65 *Scanner 66 Pos 67 } 68 69 func (s *mysqlSpecificCodeScanner) scan() (tok int, pos Pos, lit string) { 70 tok, pos, lit = s.Scanner.scan() 71 pos.Line += s.Pos.Line 72 pos.Col += s.Pos.Col 73 pos.Offset += s.Pos.Offset 74 return 75 } 76 77 type optimizerHintScanner struct { 78 *Scanner 79 Pos 80 end bool 81 } 82 83 func (s *optimizerHintScanner) scan() (tok int, pos Pos, lit string) { 84 tok, pos, lit = s.Scanner.scan() 85 pos.Line += s.Pos.Line 86 pos.Col += s.Pos.Col 87 pos.Offset += s.Pos.Offset 88 if tok == 0 { 89 if !s.end { 90 tok = hintEnd 91 s.end = true 92 } 93 } 94 return 95 } 96 97 // Errors returns the errors and warns during a scan. 98 func (s *Scanner) Errors() (warns []error, errs []error) { 99 return s.warns, s.errs 100 } 101 102 // reset resets the sql string to be scanned. 103 func (s *Scanner) reset(sql string) { 104 s.r = reader{s: sql, p: Pos{Line: 1}} 105 s.buf.Reset() 106 s.errs = s.errs[:0] 107 s.warns = s.warns[:0] 108 s.stmtStartPos = 0 109 s.specialComment = nil 110 } 111 112 func (s *Scanner) stmtText() string { 113 endPos := s.r.pos().Offset 114 if s.r.s[endPos-1] == '\n' { 115 endPos = endPos - 1 // trim new line 116 } 117 if s.r.s[s.stmtStartPos] == '\n' { 118 s.stmtStartPos++ 119 } 120 121 text := s.r.s[s.stmtStartPos:endPos] 122 123 s.stmtStartPos = endPos 124 return text 125 } 126 127 // Errorf tells scanner something is wrong. 128 // Scanner satisfies yyLexer interface which need this function. 129 func (s *Scanner) Errorf(format string, a ...interface{}) { 130 str := fmt.Sprintf(format, a...) 131 val := s.r.s[s.lastScanOffset:] 132 var lenStr = "" 133 if len(val) > 2048 { 134 lenStr = "(total length " + strconv.Itoa(len(val)) + ")" 135 val = val[:2048] 136 } 137 err := fmt.Errorf("line %d column %d near \"%s\"%s %s", 138 s.r.p.Line, s.r.p.Col, val, str, lenStr) 139 s.errs = append(s.errs, err) 140 } 141 142 // Lex returns a token and store the token value in v. 143 // Scanner satisfies yyLexer interface. 144 // 0 and invalid are special token id this function would return: 145 // return 0 tells parser that scanner meets EOF, 146 // return invalid tells parser that scanner meets illegal character. 147 func (s *Scanner) Lex(v *yySymType) int { 148 tok, pos, lit := s.scan() 149 s.lastScanOffset = pos.Offset 150 v.offset = pos.Offset 151 v.ident = lit 152 if tok == identifier { 153 tok = handleIdent(v) 154 } 155 if tok == identifier { 156 if tok1 := s.isTokenIdentifier(lit, pos.Offset); tok1 != 0 { 157 tok = tok1 158 } 159 } 160 if s.sqlMode.HasANSIQuotesMode() && 161 tok == stringLit && 162 s.r.s[v.offset] == '"' { 163 tok = identifier 164 } 165 166 if tok == pipes && !(s.sqlMode.HasPipesAsConcatMode()) { 167 return pipesAsOr 168 } 169 170 if tok == not && s.sqlMode.HasHighNotPrecedenceMode() { 171 return not2 172 } 173 174 switch tok { 175 case intLit: 176 return toInt(s, v, lit) 177 case floatLit: 178 return toFloat(s, v, lit) 179 case decLit: 180 return toDecimal(s, v, lit) 181 case hexLit: 182 return toHex(s, v, lit) 183 case bitLit: 184 return toBit(s, v, lit) 185 case singleAtIdentifier, doubleAtIdentifier, cast, extract: 186 v.item = lit 187 return tok 188 case null: 189 v.item = nil 190 case quotedIdentifier: 191 tok = identifier 192 } 193 if tok == unicode.ReplacementChar && s.r.eof() { 194 return 0 195 } 196 return tok 197 } 198 199 // SetSQLMode sets the SQL mode for scanner. 200 func (s *Scanner) SetSQLMode(mode mysql.SQLMode) { 201 s.sqlMode = mode 202 } 203 204 // GetSQLMode return the SQL mode of scanner. 205 func (s *Scanner) GetSQLMode() mysql.SQLMode { 206 return s.sqlMode 207 } 208 209 // EnableWindowFunc controls whether the scanner recognize the keywords of window function. 210 func (s *Scanner) EnableWindowFunc(val bool) { 211 s.supportWindowFunc = val 212 } 213 214 // NewScanner returns a new scanner object. 215 func NewScanner(s string) *Scanner { 216 return &Scanner{r: reader{s: s}} 217 } 218 219 func (s *Scanner) skipWhitespace() rune { 220 return s.r.incAsLongAs(unicode.IsSpace) 221 } 222 223 func (s *Scanner) scan() (tok int, pos Pos, lit string) { 224 if s.specialComment != nil { 225 // Enter specialComment scan mode. 226 // for scanning such kind of comment: /*! MySQL-specific code */ 227 specialComment := s.specialComment 228 tok, pos, lit = specialComment.scan() 229 if tok != 0 { 230 // return the specialComment scan result as the result 231 return 232 } 233 // leave specialComment scan mode after all stream consumed. 234 s.specialComment = nil 235 } 236 237 ch0 := s.r.peek() 238 if unicode.IsSpace(ch0) { 239 ch0 = s.skipWhitespace() 240 } 241 pos = s.r.pos() 242 if s.r.eof() { 243 // when scanner meets EOF, the returned token should be 0, 244 // because 0 is a special token id to remind the parser that stream is end. 245 return 0, pos, "" 246 } 247 248 if !s.r.eof() && isIdentExtend(ch0) { 249 return scanIdentifier(s) 250 } 251 252 // search a trie to get a token. 253 node := &ruleTable 254 for ch0 >= 0 && ch0 <= 255 { 255 if node.childs[ch0] == nil || s.r.eof() { 256 break 257 } 258 node = node.childs[ch0] 259 if node.fn != nil { 260 return node.fn(s) 261 } 262 s.r.inc() 263 ch0 = s.r.peek() 264 } 265 266 tok, lit = node.token, s.r.data(&pos) 267 return 268 } 269 270 func startWithXx(s *Scanner) (tok int, pos Pos, lit string) { 271 pos = s.r.pos() 272 s.r.inc() 273 if s.r.peek() == '\'' { 274 s.r.inc() 275 s.scanHex() 276 if s.r.peek() == '\'' { 277 s.r.inc() 278 tok, lit = hexLit, s.r.data(&pos) 279 } else { 280 tok = unicode.ReplacementChar 281 } 282 return 283 } 284 s.r.incAsLongAs(isIdentChar) 285 tok, lit = identifier, s.r.data(&pos) 286 return 287 } 288 289 func startWithNn(s *Scanner) (tok int, pos Pos, lit string) { 290 tok, pos, lit = scanIdentifier(s) 291 // The National Character Set, N'some text' or n'some test'. 292 // See https://dev.mysql.com/doc/refman/5.7/en/string-literals.html 293 // and https://dev.mysql.com/doc/refman/5.7/en/charset-national.html 294 if lit == "N" || lit == "n" { 295 if s.r.peek() == '\'' { 296 tok = underscoreCS 297 lit = "utf8" 298 } 299 } 300 return 301 } 302 303 func startWithBb(s *Scanner) (tok int, pos Pos, lit string) { 304 pos = s.r.pos() 305 s.r.inc() 306 if s.r.peek() == '\'' { 307 s.r.inc() 308 s.scanBit() 309 if s.r.peek() == '\'' { 310 s.r.inc() 311 tok, lit = bitLit, s.r.data(&pos) 312 } else { 313 tok = unicode.ReplacementChar 314 } 315 return 316 } 317 s.r.incAsLongAs(isIdentChar) 318 tok, lit = identifier, s.r.data(&pos) 319 return 320 } 321 322 func startWithSharp(s *Scanner) (tok int, pos Pos, lit string) { 323 s.r.incAsLongAs(func(ch rune) bool { 324 return ch != '\n' 325 }) 326 return s.scan() 327 } 328 329 func startWithDash(s *Scanner) (tok int, pos Pos, lit string) { 330 pos = s.r.pos() 331 if strings.HasPrefix(s.r.s[pos.Offset:], "--") { 332 remainLen := len(s.r.s[pos.Offset:]) 333 if remainLen == 2 || (remainLen > 2 && unicode.IsSpace(rune(s.r.s[pos.Offset+2]))) { 334 s.r.incAsLongAs(func(ch rune) bool { 335 return ch != '\n' 336 }) 337 return s.scan() 338 } 339 } 340 if strings.HasPrefix(s.r.s[pos.Offset:], "->>") { 341 tok = juss 342 s.r.incN(3) 343 return 344 } 345 if strings.HasPrefix(s.r.s[pos.Offset:], "->") { 346 tok = jss 347 s.r.incN(2) 348 return 349 } 350 tok = int('-') 351 lit = "-" 352 s.r.inc() 353 return 354 } 355 356 func startWithSlash(s *Scanner) (tok int, pos Pos, lit string) { 357 pos = s.r.pos() 358 s.r.inc() 359 ch0 := s.r.peek() 360 if ch0 == '*' { 361 s.r.inc() 362 startWithAsterisk := false 363 for { 364 ch0 = s.r.readByte() 365 if startWithAsterisk && ch0 == '/' { 366 // Meets */, means comment end. 367 break 368 } else if ch0 == '*' { 369 startWithAsterisk = true 370 } else { 371 startWithAsterisk = false 372 } 373 374 if ch0 == unicode.ReplacementChar && s.r.eof() { 375 // unclosed comment 376 s.errs = append(s.errs, ParseErrorWith(s.r.data(&pos), s.r.p.Line)) 377 return 378 } 379 380 } 381 382 comment := s.r.data(&pos) 383 384 // See https://dev.mysql.com/doc/refman/5.7/en/optimizer-hints.html 385 if strings.HasPrefix(comment, "/*+") { 386 begin := sqlOffsetInComment(comment) 387 end := len(comment) - 2 388 sql := comment[begin:end] 389 s.specialComment = &optimizerHintScanner{ 390 Scanner: NewScanner(sql), 391 Pos: Pos{ 392 pos.Line, 393 pos.Col, 394 pos.Offset + begin, 395 }, 396 } 397 398 tok = hintBegin 399 return 400 } 401 402 // See http://dev.mysql.com/doc/refman/5.7/en/comments.html 403 // Convert "/*!VersionNumber MySQL-specific-code */" to "MySQL-specific-code". 404 if strings.HasPrefix(comment, "/*!") { 405 sql := specCodePattern.ReplaceAllStringFunc(comment, TrimComment) 406 s.specialComment = &mysqlSpecificCodeScanner{ 407 Scanner: NewScanner(sql), 408 Pos: Pos{ 409 pos.Line, 410 pos.Col, 411 pos.Offset + sqlOffsetInComment(comment), 412 }, 413 } 414 } 415 416 return s.scan() 417 } 418 tok = int('/') 419 return 420 } 421 422 func sqlOffsetInComment(comment string) int { 423 // find the first SQL token offset in pattern like "/*!40101 mysql specific code */" 424 offset := 0 425 for i := 0; i < len(comment); i++ { 426 if unicode.IsSpace(rune(comment[i])) { 427 offset = i 428 break 429 } 430 } 431 for offset < len(comment) { 432 offset++ 433 if !unicode.IsSpace(rune(comment[offset])) { 434 break 435 } 436 } 437 return offset 438 } 439 440 func startWithAt(s *Scanner) (tok int, pos Pos, lit string) { 441 pos = s.r.pos() 442 s.r.inc() 443 444 tok, lit = scanIdentifierOrString(s) 445 switch tok { 446 case '@': 447 s.r.inc() 448 stream := s.r.s[pos.Offset+2:] 449 var prefix string 450 for _, v := range []string{"global.", "session.", "local."} { 451 if len(v) > len(stream) { 452 continue 453 } 454 if strings.EqualFold(stream[:len(v)], v) { 455 prefix = v 456 s.r.incN(len(v)) 457 break 458 } 459 } 460 tok, lit = scanIdentifierOrString(s) 461 switch tok { 462 case stringLit, quotedIdentifier: 463 tok, lit = doubleAtIdentifier, "@@"+prefix+lit 464 case identifier: 465 tok, lit = doubleAtIdentifier, s.r.data(&pos) 466 } 467 case unicode.ReplacementChar: 468 break 469 default: 470 tok = singleAtIdentifier 471 } 472 473 return 474 } 475 476 func scanIdentifier(s *Scanner) (int, Pos, string) { 477 pos := s.r.pos() 478 s.r.inc() 479 s.r.incAsLongAs(isIdentChar) 480 return identifier, pos, s.r.data(&pos) 481 } 482 483 func scanIdentifierOrString(s *Scanner) (tok int, lit string) { 484 ch1 := s.r.peek() 485 switch ch1 { 486 case '\'', '"': 487 tok, _, lit = startString(s) 488 case '`': 489 tok, _, lit = scanQuotedIdent(s) 490 default: 491 if isUserVarChar(ch1) { 492 pos := s.r.pos() 493 s.r.incAsLongAs(isUserVarChar) 494 tok, lit = identifier, s.r.data(&pos) 495 } else { 496 tok = int(ch1) 497 } 498 } 499 return 500 } 501 502 var ( 503 quotedIdentifier = -identifier 504 ) 505 506 func scanQuotedIdent(s *Scanner) (tok int, pos Pos, lit string) { 507 pos = s.r.pos() 508 s.r.inc() 509 s.buf.Reset() 510 for { 511 ch := s.r.readByte() 512 if ch == unicode.ReplacementChar && s.r.eof() { 513 tok = unicode.ReplacementChar 514 return 515 } 516 if ch == '`' { 517 if s.r.peek() != '`' { 518 // don't return identifier in case that it's interpreted as keyword token later. 519 tok, lit = quotedIdentifier, s.buf.String() 520 return 521 } 522 s.r.inc() 523 } 524 s.buf.WriteRune(ch) 525 } 526 } 527 528 func startString(s *Scanner) (tok int, pos Pos, lit string) { 529 return s.scanString() 530 } 531 532 // lazyBuf is used to avoid allocation if possible. 533 // it has a useBuf field indicates whether bytes.Buffer is necessary. if 534 // useBuf is false, we can avoid calling bytes.Buffer.String(), which 535 // make a copy of data and cause allocation. 536 type lazyBuf struct { 537 useBuf bool 538 r *reader 539 b *bytes.Buffer 540 p *Pos 541 } 542 543 func (mb *lazyBuf) setUseBuf(str string) { 544 if !mb.useBuf { 545 mb.useBuf = true 546 mb.b.Reset() 547 mb.b.WriteString(str) 548 } 549 } 550 551 func (mb *lazyBuf) writeRune(r rune, w int) { 552 if mb.useBuf { 553 if w > 1 { 554 mb.b.WriteRune(r) 555 } else { 556 mb.b.WriteByte(byte(r)) 557 } 558 } 559 } 560 561 func (mb *lazyBuf) data() string { 562 var lit string 563 if mb.useBuf { 564 lit = mb.b.String() 565 } else { 566 lit = mb.r.data(mb.p) 567 lit = lit[1 : len(lit)-1] 568 } 569 return lit 570 } 571 572 func (s *Scanner) scanString() (tok int, pos Pos, lit string) { 573 tok, pos = stringLit, s.r.pos() 574 mb := lazyBuf{false, &s.r, &s.buf, &pos} 575 ending := s.r.readByte() 576 ch0 := s.r.peek() 577 for !s.r.eof() { 578 if ch0 == ending { 579 s.r.inc() 580 if s.r.peek() != ending { 581 lit = mb.data() 582 return 583 } 584 str := mb.r.data(&pos) 585 mb.setUseBuf(str[1 : len(str)-1]) 586 } else if ch0 == '\\' && !s.sqlMode.HasNoBackslashEscapesMode() { 587 mb.setUseBuf(mb.r.data(&pos)[1:]) 588 ch0 = handleEscape(s) 589 } 590 mb.writeRune(ch0, s.r.w) 591 if !s.r.eof() { 592 s.r.inc() 593 ch0 = s.r.peek() 594 } 595 } 596 597 tok = unicode.ReplacementChar 598 return 599 } 600 601 // handleEscape handles the case in scanString when previous char is '\'. 602 func handleEscape(s *Scanner) rune { 603 s.r.inc() 604 ch0 := s.r.peek() 605 /* 606 \" \' \\ \n \0 \b \Z \r \t ==> escape to one char 607 \% \_ ==> preserve both char 608 other ==> remove \ 609 */ 610 switch ch0 { 611 case 'n': 612 ch0 = '\n' 613 case '0': 614 ch0 = 0 615 case 'b': 616 ch0 = 8 617 case 'Z': 618 ch0 = 26 619 case 'r': 620 ch0 = '\r' 621 case 't': 622 ch0 = '\t' 623 case '%', '_': 624 s.buf.WriteByte('\\') 625 } 626 return ch0 627 } 628 629 func startWithNumber(s *Scanner) (tok int, pos Pos, lit string) { 630 pos = s.r.pos() 631 tok = intLit 632 ch0 := s.r.readByte() 633 if ch0 == '0' { 634 tok = intLit 635 ch1 := s.r.peek() 636 switch { 637 case ch1 >= '0' && ch1 <= '7': 638 s.r.inc() 639 s.scanOct() 640 case ch1 == 'x' || ch1 == 'X': 641 s.r.inc() 642 p1 := s.r.pos() 643 s.scanHex() 644 p2 := s.r.pos() 645 // 0x, 0x7fz3 are identifier 646 if p1 == p2 || isDigit(s.r.peek()) { 647 s.r.incAsLongAs(isIdentChar) 648 return identifier, pos, s.r.data(&pos) 649 } 650 tok = hexLit 651 case ch1 == 'b': 652 s.r.inc() 653 p1 := s.r.pos() 654 s.scanBit() 655 p2 := s.r.pos() 656 // 0b, 0b123, 0b1ab are identifier 657 if p1 == p2 || isDigit(s.r.peek()) { 658 s.r.incAsLongAs(isIdentChar) 659 return identifier, pos, s.r.data(&pos) 660 } 661 tok = bitLit 662 case ch1 == '.': 663 return s.scanFloat(&pos) 664 case ch1 == 'B': 665 s.r.incAsLongAs(isIdentChar) 666 return identifier, pos, s.r.data(&pos) 667 } 668 } 669 670 s.scanDigits() 671 ch0 = s.r.peek() 672 if ch0 == '.' || ch0 == 'e' || ch0 == 'E' { 673 return s.scanFloat(&pos) 674 } 675 676 // Identifiers may begin with a digit but unless quoted may not consist solely of digits. 677 if !s.r.eof() && isIdentChar(ch0) { 678 s.r.incAsLongAs(isIdentChar) 679 return identifier, pos, s.r.data(&pos) 680 } 681 lit = s.r.data(&pos) 682 return 683 } 684 685 func startWithDot(s *Scanner) (tok int, pos Pos, lit string) { 686 pos = s.r.pos() 687 s.r.inc() 688 save := s.r.pos() 689 if isDigit(s.r.peek()) { 690 tok, _, lit = s.scanFloat(&pos) 691 if s.r.eof() || !isIdentChar(s.r.peek()) { 692 return 693 } 694 // Fail to parse a float, reset to dot. 695 s.r.p = save 696 } 697 tok, lit = int('.'), "." 698 return 699 } 700 701 func (s *Scanner) scanOct() { 702 s.r.incAsLongAs(func(ch rune) bool { 703 return ch >= '0' && ch <= '7' 704 }) 705 } 706 707 func (s *Scanner) scanHex() { 708 s.r.incAsLongAs(func(ch rune) bool { 709 return ch >= '0' && ch <= '9' || 710 ch >= 'a' && ch <= 'f' || 711 ch >= 'A' && ch <= 'F' 712 }) 713 } 714 715 func (s *Scanner) scanBit() { 716 s.r.incAsLongAs(func(ch rune) bool { 717 return ch == '0' || ch == '1' 718 }) 719 } 720 721 func (s *Scanner) scanFloat(beg *Pos) (tok int, pos Pos, lit string) { 722 s.r.p = *beg 723 // float = D1 . D2 e D3 724 s.scanDigits() 725 ch0 := s.r.peek() 726 if ch0 == '.' { 727 s.r.inc() 728 s.scanDigits() 729 ch0 = s.r.peek() 730 } 731 if ch0 == 'e' || ch0 == 'E' { 732 s.r.inc() 733 ch0 = s.r.peek() 734 if ch0 == '-' || ch0 == '+' || isDigit(ch0) { 735 s.r.inc() 736 s.scanDigits() 737 tok = floatLit 738 } else { 739 // D1 . D2 e XX when XX is not D3, parse the result to an identifier. 740 // 9e9e = 9e9(float) + e(identifier) 741 // 9est = 9est(identifier) 742 s.r.incAsLongAs(isIdentChar) 743 tok = identifier 744 } 745 } else { 746 tok = decLit 747 } 748 pos, lit = *beg, s.r.data(beg) 749 return 750 } 751 752 func (s *Scanner) scanDigits() string { 753 pos := s.r.pos() 754 s.r.incAsLongAs(isDigit) 755 return s.r.data(&pos) 756 } 757 758 type reader struct { 759 s string 760 p Pos 761 w int 762 } 763 764 var eof = Pos{-1, -1, -1} 765 766 func (r *reader) eof() bool { 767 return r.p.Offset >= len(r.s) 768 } 769 770 // peek() peeks a rune from underlying reader. 771 // if reader meets EOF, it will return unicode.ReplacementChar. to distinguish from 772 // the real unicode.ReplacementChar, the caller should call r.eof() again to check. 773 func (r *reader) peek() rune { 774 if r.eof() { 775 return unicode.ReplacementChar 776 } 777 v, w := rune(r.s[r.p.Offset]), 1 778 switch { 779 case v == 0: 780 r.w = w 781 return v // illegal UTF-8 encoding 782 case v >= 0x80: 783 v, w = utf8.DecodeRuneInString(r.s[r.p.Offset:]) 784 if v == utf8.RuneError && w == 1 { 785 v = rune(r.s[r.p.Offset]) // illegal UTF-8 encoding 786 } 787 } 788 r.w = w 789 return v 790 } 791 792 // inc increase the position offset of the reader. 793 // peek must be called before calling inc! 794 func (r *reader) inc() { 795 if r.s[r.p.Offset] == '\n' { 796 r.p.Line++ 797 r.p.Col = 0 798 } 799 r.p.Offset += r.w 800 r.p.Col++ 801 } 802 803 func (r *reader) incN(n int) { 804 for i := 0; i < n; i++ { 805 r.inc() 806 } 807 } 808 809 func (r *reader) readByte() (ch rune) { 810 ch = r.peek() 811 if ch == unicode.ReplacementChar && r.eof() { 812 return 813 } 814 r.inc() 815 return 816 } 817 818 func (r *reader) pos() Pos { 819 return r.p 820 } 821 822 func (r *reader) data(from *Pos) string { 823 return r.s[from.Offset:r.p.Offset] 824 } 825 826 func (r *reader) incAsLongAs(fn func(rune) bool) rune { 827 for { 828 ch := r.peek() 829 if !fn(ch) { 830 return ch 831 } 832 if ch == unicode.ReplacementChar && r.eof() { 833 return 0 834 } 835 r.inc() 836 } 837 }