github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/text/scanner/scanner_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package scanner 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "strings" 12 "testing" 13 "unicode/utf8" 14 ) 15 16 // A StringReader delivers its data one string segment at a time via Read. 17 type StringReader struct { 18 data []string 19 step int 20 } 21 22 func (r *StringReader) Read(p []byte) (n int, err error) { 23 if r.step < len(r.data) { 24 s := r.data[r.step] 25 n = copy(p, s) 26 r.step++ 27 } else { 28 err = io.EOF 29 } 30 return 31 } 32 33 func readRuneSegments(t *testing.T, segments []string) { 34 got := "" 35 want := strings.Join(segments, "") 36 s := new(Scanner).Init(&StringReader{data: segments}) 37 for { 38 ch := s.Next() 39 if ch == EOF { 40 break 41 } 42 got += string(ch) 43 } 44 if got != want { 45 t.Errorf("segments=%v got=%s want=%s", segments, got, want) 46 } 47 } 48 49 var segmentList = [][]string{ 50 {}, 51 {""}, 52 {"日", "本語"}, 53 {"\u65e5", "\u672c", "\u8a9e"}, 54 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"}, 55 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"}, 56 {"Hello", ", ", "World", "!"}, 57 {"Hello", ", ", "", "World", "!"}, 58 } 59 60 func TestNext(t *testing.T) { 61 for _, s := range segmentList { 62 readRuneSegments(t, s) 63 } 64 } 65 66 type token struct { 67 tok rune 68 text string 69 } 70 71 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" 72 73 var tokenList = []token{ 74 {Comment, "// line comments"}, 75 {Comment, "//"}, 76 {Comment, "////"}, 77 {Comment, "// comment"}, 78 {Comment, "// /* comment */"}, 79 {Comment, "// // comment //"}, 80 {Comment, "//" + f100}, 81 82 {Comment, "// general comments"}, 83 {Comment, "/**/"}, 84 {Comment, "/***/"}, 85 {Comment, "/* comment */"}, 86 {Comment, "/* // comment */"}, 87 {Comment, "/* /* comment */"}, 88 {Comment, "/*\n comment\n*/"}, 89 {Comment, "/*" + f100 + "*/"}, 90 91 {Comment, "// identifiers"}, 92 {Ident, "a"}, 93 {Ident, "a0"}, 94 {Ident, "foobar"}, 95 {Ident, "abc123"}, 96 {Ident, "LGTM"}, 97 {Ident, "_"}, 98 {Ident, "_abc123"}, 99 {Ident, "abc123_"}, 100 {Ident, "_abc_123_"}, 101 {Ident, "_äöü"}, 102 {Ident, "_本"}, 103 {Ident, "äöü"}, 104 {Ident, "本"}, 105 {Ident, "a۰۱۸"}, 106 {Ident, "foo६४"}, 107 {Ident, "bar9876"}, 108 {Ident, f100}, 109 110 {Comment, "// decimal ints"}, 111 {Int, "0"}, 112 {Int, "1"}, 113 {Int, "9"}, 114 {Int, "42"}, 115 {Int, "1234567890"}, 116 117 {Comment, "// octal ints"}, 118 {Int, "00"}, 119 {Int, "01"}, 120 {Int, "07"}, 121 {Int, "042"}, 122 {Int, "01234567"}, 123 124 {Comment, "// hexadecimal ints"}, 125 {Int, "0x0"}, 126 {Int, "0x1"}, 127 {Int, "0xf"}, 128 {Int, "0x42"}, 129 {Int, "0x123456789abcDEF"}, 130 {Int, "0x" + f100}, 131 {Int, "0X0"}, 132 {Int, "0X1"}, 133 {Int, "0XF"}, 134 {Int, "0X42"}, 135 {Int, "0X123456789abcDEF"}, 136 {Int, "0X" + f100}, 137 138 {Comment, "// floats"}, 139 {Float, "0."}, 140 {Float, "1."}, 141 {Float, "42."}, 142 {Float, "01234567890."}, 143 {Float, ".0"}, 144 {Float, ".1"}, 145 {Float, ".42"}, 146 {Float, ".0123456789"}, 147 {Float, "0.0"}, 148 {Float, "1.0"}, 149 {Float, "42.0"}, 150 {Float, "01234567890.0"}, 151 {Float, "0e0"}, 152 {Float, "1e0"}, 153 {Float, "42e0"}, 154 {Float, "01234567890e0"}, 155 {Float, "0E0"}, 156 {Float, "1E0"}, 157 {Float, "42E0"}, 158 {Float, "01234567890E0"}, 159 {Float, "0e+10"}, 160 {Float, "1e-10"}, 161 {Float, "42e+10"}, 162 {Float, "01234567890e-10"}, 163 {Float, "0E+10"}, 164 {Float, "1E-10"}, 165 {Float, "42E+10"}, 166 {Float, "01234567890E-10"}, 167 168 {Comment, "// chars"}, 169 {Char, `' '`}, 170 {Char, `'a'`}, 171 {Char, `'本'`}, 172 {Char, `'\a'`}, 173 {Char, `'\b'`}, 174 {Char, `'\f'`}, 175 {Char, `'\n'`}, 176 {Char, `'\r'`}, 177 {Char, `'\t'`}, 178 {Char, `'\v'`}, 179 {Char, `'\''`}, 180 {Char, `'\000'`}, 181 {Char, `'\777'`}, 182 {Char, `'\x00'`}, 183 {Char, `'\xff'`}, 184 {Char, `'\u0000'`}, 185 {Char, `'\ufA16'`}, 186 {Char, `'\U00000000'`}, 187 {Char, `'\U0000ffAB'`}, 188 189 {Comment, "// strings"}, 190 {String, `" "`}, 191 {String, `"a"`}, 192 {String, `"本"`}, 193 {String, `"\a"`}, 194 {String, `"\b"`}, 195 {String, `"\f"`}, 196 {String, `"\n"`}, 197 {String, `"\r"`}, 198 {String, `"\t"`}, 199 {String, `"\v"`}, 200 {String, `"\""`}, 201 {String, `"\000"`}, 202 {String, `"\777"`}, 203 {String, `"\x00"`}, 204 {String, `"\xff"`}, 205 {String, `"\u0000"`}, 206 {String, `"\ufA16"`}, 207 {String, `"\U00000000"`}, 208 {String, `"\U0000ffAB"`}, 209 {String, `"` + f100 + `"`}, 210 211 {Comment, "// raw strings"}, 212 {RawString, "``"}, 213 {RawString, "`\\`"}, 214 {RawString, "`" + "\n\n/* foobar */\n\n" + "`"}, 215 {RawString, "`" + f100 + "`"}, 216 217 {Comment, "// individual characters"}, 218 // NUL character is not allowed 219 {'\x01', "\x01"}, 220 {' ' - 1, string(' ' - 1)}, 221 {'+', "+"}, 222 {'/', "/"}, 223 {'.', "."}, 224 {'~', "~"}, 225 {'(', "("}, 226 } 227 228 func makeSource(pattern string) *bytes.Buffer { 229 var buf bytes.Buffer 230 for _, k := range tokenList { 231 fmt.Fprintf(&buf, pattern, k.text) 232 } 233 return &buf 234 } 235 236 func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) { 237 if got != want { 238 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text) 239 } 240 if s.Line != line { 241 t.Errorf("line = %d, want %d for %q", s.Line, line, text) 242 } 243 stext := s.TokenText() 244 if stext != text { 245 t.Errorf("text = %q, want %q", stext, text) 246 } else { 247 // check idempotency of TokenText() call 248 stext = s.TokenText() 249 if stext != text { 250 t.Errorf("text = %q, want %q (idempotency check)", stext, text) 251 } 252 } 253 } 254 255 func checkTokErr(t *testing.T, s *Scanner, line int, want rune, text string) { 256 prevCount := s.ErrorCount 257 checkTok(t, s, line, s.Scan(), want, text) 258 if s.ErrorCount != prevCount+1 { 259 t.Fatalf("want error for %q", text) 260 } 261 } 262 263 func countNewlines(s string) int { 264 n := 0 265 for _, ch := range s { 266 if ch == '\n' { 267 n++ 268 } 269 } 270 return n 271 } 272 273 func testScan(t *testing.T, mode uint) { 274 s := new(Scanner).Init(makeSource(" \t%s\n")) 275 s.Mode = mode 276 tok := s.Scan() 277 line := 1 278 for _, k := range tokenList { 279 if mode&SkipComments == 0 || k.tok != Comment { 280 checkTok(t, s, line, tok, k.tok, k.text) 281 tok = s.Scan() 282 } 283 line += countNewlines(k.text) + 1 // each token is on a new line 284 } 285 checkTok(t, s, line, tok, EOF, "") 286 } 287 288 func TestScan(t *testing.T) { 289 testScan(t, GoTokens) 290 testScan(t, GoTokens&^SkipComments) 291 } 292 293 func TestInvalidExponent(t *testing.T) { 294 const src = "1.5e 1.5E 1e+ 1e- 1.5z" 295 s := new(Scanner).Init(strings.NewReader(src)) 296 s.Error = func(s *Scanner, msg string) { 297 const want = "exponent has no digits" 298 if msg != want { 299 t.Errorf("%s: got error %q; want %q", s.TokenText(), msg, want) 300 } 301 } 302 checkTokErr(t, s, 1, Float, "1.5e") 303 checkTokErr(t, s, 1, Float, "1.5E") 304 checkTokErr(t, s, 1, Float, "1e+") 305 checkTokErr(t, s, 1, Float, "1e-") 306 checkTok(t, s, 1, s.Scan(), Float, "1.5") 307 checkTok(t, s, 1, s.Scan(), Ident, "z") 308 checkTok(t, s, 1, s.Scan(), EOF, "") 309 if s.ErrorCount != 4 { 310 t.Errorf("%d errors, want 4", s.ErrorCount) 311 } 312 } 313 314 func TestPosition(t *testing.T) { 315 src := makeSource("\t\t\t\t%s\n") 316 s := new(Scanner).Init(src) 317 s.Mode = GoTokens &^ SkipComments 318 s.Scan() 319 pos := Position{"", 4, 1, 5} 320 for _, k := range tokenList { 321 if s.Offset != pos.Offset { 322 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text) 323 } 324 if s.Line != pos.Line { 325 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text) 326 } 327 if s.Column != pos.Column { 328 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text) 329 } 330 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline 331 pos.Line += countNewlines(k.text) + 1 // each token is on a new line 332 s.Scan() 333 } 334 // make sure there were no token-internal errors reported by scanner 335 if s.ErrorCount != 0 { 336 t.Errorf("%d errors", s.ErrorCount) 337 } 338 } 339 340 func TestScanZeroMode(t *testing.T) { 341 src := makeSource("%s\n") 342 str := src.String() 343 s := new(Scanner).Init(src) 344 s.Mode = 0 // don't recognize any token classes 345 s.Whitespace = 0 // don't skip any whitespace 346 tok := s.Scan() 347 for i, ch := range str { 348 if tok != ch { 349 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch)) 350 } 351 tok = s.Scan() 352 } 353 if tok != EOF { 354 t.Fatalf("tok = %s, want EOF", TokenString(tok)) 355 } 356 if s.ErrorCount != 0 { 357 t.Errorf("%d errors", s.ErrorCount) 358 } 359 } 360 361 func testScanSelectedMode(t *testing.T, mode uint, class rune) { 362 src := makeSource("%s\n") 363 s := new(Scanner).Init(src) 364 s.Mode = mode 365 tok := s.Scan() 366 for tok != EOF { 367 if tok < 0 && tok != class { 368 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class)) 369 } 370 tok = s.Scan() 371 } 372 if s.ErrorCount != 0 { 373 t.Errorf("%d errors", s.ErrorCount) 374 } 375 } 376 377 func TestScanSelectedMask(t *testing.T) { 378 testScanSelectedMode(t, 0, 0) 379 testScanSelectedMode(t, ScanIdents, Ident) 380 // Don't test ScanInts and ScanNumbers since some parts of 381 // the floats in the source look like (invalid) octal ints 382 // and ScanNumbers may return either Int or Float. 383 testScanSelectedMode(t, ScanChars, Char) 384 testScanSelectedMode(t, ScanStrings, String) 385 testScanSelectedMode(t, SkipComments, 0) 386 testScanSelectedMode(t, ScanComments, Comment) 387 } 388 389 func TestScanCustomIdent(t *testing.T) { 390 const src = "faab12345 a12b123 a12 3b" 391 s := new(Scanner).Init(strings.NewReader(src)) 392 // ident = ( 'a' | 'b' ) { digit } . 393 // digit = '0' .. '3' . 394 // with a maximum length of 4 395 s.IsIdentRune = func(ch rune, i int) bool { 396 return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3' 397 } 398 checkTok(t, s, 1, s.Scan(), 'f', "f") 399 checkTok(t, s, 1, s.Scan(), Ident, "a") 400 checkTok(t, s, 1, s.Scan(), Ident, "a") 401 checkTok(t, s, 1, s.Scan(), Ident, "b123") 402 checkTok(t, s, 1, s.Scan(), Int, "45") 403 checkTok(t, s, 1, s.Scan(), Ident, "a12") 404 checkTok(t, s, 1, s.Scan(), Ident, "b123") 405 checkTok(t, s, 1, s.Scan(), Ident, "a12") 406 checkTok(t, s, 1, s.Scan(), Int, "3") 407 checkTok(t, s, 1, s.Scan(), Ident, "b") 408 checkTok(t, s, 1, s.Scan(), EOF, "") 409 } 410 411 func TestScanNext(t *testing.T) { 412 const BOM = '\uFEFF' 413 BOMs := string(BOM) 414 s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof")) 415 checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored 416 checkTok(t, s, 1, s.Scan(), Ident, "a") 417 checkTok(t, s, 1, s.Scan(), '=', "=") 418 checkTok(t, s, 0, s.Next(), '=', "") 419 checkTok(t, s, 0, s.Next(), ' ', "") 420 checkTok(t, s, 0, s.Next(), 'b', "") 421 checkTok(t, s, 1, s.Scan(), Ident, "cd") 422 checkTok(t, s, 1, s.Scan(), '{', "{") 423 checkTok(t, s, 2, s.Scan(), Ident, "a") 424 checkTok(t, s, 2, s.Scan(), '+', "+") 425 checkTok(t, s, 0, s.Next(), '=', "") 426 checkTok(t, s, 2, s.Scan(), Ident, "c") 427 checkTok(t, s, 3, s.Scan(), '}', "}") 428 checkTok(t, s, 3, s.Scan(), BOM, BOMs) 429 checkTok(t, s, 3, s.Scan(), -1, "") 430 if s.ErrorCount != 0 { 431 t.Errorf("%d errors", s.ErrorCount) 432 } 433 } 434 435 func TestScanWhitespace(t *testing.T) { 436 var buf bytes.Buffer 437 var ws uint64 438 // start at 1, NUL character is not allowed 439 for ch := byte(1); ch < ' '; ch++ { 440 buf.WriteByte(ch) 441 ws |= 1 << ch 442 } 443 const orig = 'x' 444 buf.WriteByte(orig) 445 446 s := new(Scanner).Init(&buf) 447 s.Mode = 0 448 s.Whitespace = ws 449 tok := s.Scan() 450 if tok != orig { 451 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig)) 452 } 453 } 454 455 func testError(t *testing.T, src, pos, msg string, tok rune) { 456 s := new(Scanner).Init(strings.NewReader(src)) 457 errorCalled := false 458 s.Error = func(s *Scanner, m string) { 459 if !errorCalled { 460 // only look at first error 461 if p := s.Pos().String(); p != pos { 462 t.Errorf("pos = %q, want %q for %q", p, pos, src) 463 } 464 if m != msg { 465 t.Errorf("msg = %q, want %q for %q", m, msg, src) 466 } 467 errorCalled = true 468 } 469 } 470 tk := s.Scan() 471 if tk != tok { 472 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src) 473 } 474 if !errorCalled { 475 t.Errorf("error handler not called for %q", src) 476 } 477 if s.ErrorCount == 0 { 478 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) 479 } 480 } 481 482 func TestError(t *testing.T) { 483 testError(t, "\x00", "<input>:1:1", "invalid character NUL", 0) 484 testError(t, "\x80", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError) 485 testError(t, "\xff", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError) 486 487 testError(t, "a\x00", "<input>:1:2", "invalid character NUL", Ident) 488 testError(t, "ab\x80", "<input>:1:3", "invalid UTF-8 encoding", Ident) 489 testError(t, "abc\xff", "<input>:1:4", "invalid UTF-8 encoding", Ident) 490 491 testError(t, `"a`+"\x00", "<input>:1:3", "invalid character NUL", String) 492 testError(t, `"ab`+"\x80", "<input>:1:4", "invalid UTF-8 encoding", String) 493 testError(t, `"abc`+"\xff", "<input>:1:5", "invalid UTF-8 encoding", String) 494 495 testError(t, "`a"+"\x00", "<input>:1:3", "invalid character NUL", RawString) 496 testError(t, "`ab"+"\x80", "<input>:1:4", "invalid UTF-8 encoding", RawString) 497 testError(t, "`abc"+"\xff", "<input>:1:5", "invalid UTF-8 encoding", RawString) 498 499 testError(t, `'\"'`, "<input>:1:3", "invalid char escape", Char) 500 testError(t, `"\'"`, "<input>:1:3", "invalid char escape", String) 501 502 testError(t, `01238`, "<input>:1:6", "invalid digit '8' in octal literal", Int) 503 testError(t, `01238123`, "<input>:1:9", "invalid digit '8' in octal literal", Int) 504 testError(t, `0x`, "<input>:1:3", "hexadecimal literal has no digits", Int) 505 testError(t, `0xg`, "<input>:1:3", "hexadecimal literal has no digits", Int) 506 testError(t, `'aa'`, "<input>:1:4", "invalid char literal", Char) 507 testError(t, `1.5e`, "<input>:1:5", "exponent has no digits", Float) 508 testError(t, `1.5E`, "<input>:1:5", "exponent has no digits", Float) 509 testError(t, `1.5e+`, "<input>:1:6", "exponent has no digits", Float) 510 testError(t, `1.5e-`, "<input>:1:6", "exponent has no digits", Float) 511 512 testError(t, `'`, "<input>:1:2", "literal not terminated", Char) 513 testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char) 514 testError(t, `"abc`, "<input>:1:5", "literal not terminated", String) 515 testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String) 516 testError(t, "`abc\n", "<input>:2:1", "literal not terminated", RawString) 517 testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF) 518 } 519 520 // An errReader returns (0, err) where err is not io.EOF. 521 type errReader struct{} 522 523 func (errReader) Read(b []byte) (int, error) { 524 return 0, io.ErrNoProgress // some error that is not io.EOF 525 } 526 527 func TestIOError(t *testing.T) { 528 s := new(Scanner).Init(errReader{}) 529 errorCalled := false 530 s.Error = func(s *Scanner, msg string) { 531 if !errorCalled { 532 if want := io.ErrNoProgress.Error(); msg != want { 533 t.Errorf("msg = %q, want %q", msg, want) 534 } 535 errorCalled = true 536 } 537 } 538 tok := s.Scan() 539 if tok != EOF { 540 t.Errorf("tok = %s, want EOF", TokenString(tok)) 541 } 542 if !errorCalled { 543 t.Errorf("error handler not called") 544 } 545 } 546 547 func checkPos(t *testing.T, got, want Position) { 548 if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column { 549 t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d", 550 got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column) 551 } 552 } 553 554 func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) { 555 if ch := s.Next(); ch != char { 556 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char)) 557 } 558 want := Position{Offset: offset, Line: line, Column: column} 559 checkPos(t, s.Pos(), want) 560 } 561 562 func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) { 563 want := Position{Offset: offset, Line: line, Column: column} 564 checkPos(t, s.Pos(), want) 565 if ch := s.Scan(); ch != char { 566 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char)) 567 if string(ch) != s.TokenText() { 568 t.Errorf("tok = %q, want %q", s.TokenText(), string(ch)) 569 } 570 } 571 checkPos(t, s.Position, want) 572 } 573 574 func TestPos(t *testing.T) { 575 // corner case: empty source 576 s := new(Scanner).Init(strings.NewReader("")) 577 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 578 s.Peek() // peek doesn't affect the position 579 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 580 581 // corner case: source with only a newline 582 s = new(Scanner).Init(strings.NewReader("\n")) 583 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 584 checkNextPos(t, s, 1, 2, 1, '\n') 585 // after EOF position doesn't change 586 for i := 10; i > 0; i-- { 587 checkScanPos(t, s, 1, 2, 1, EOF) 588 } 589 if s.ErrorCount != 0 { 590 t.Errorf("%d errors", s.ErrorCount) 591 } 592 593 // corner case: source with only a single character 594 s = new(Scanner).Init(strings.NewReader("本")) 595 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 596 checkNextPos(t, s, 3, 1, 2, '本') 597 // after EOF position doesn't change 598 for i := 10; i > 0; i-- { 599 checkScanPos(t, s, 3, 1, 2, EOF) 600 } 601 if s.ErrorCount != 0 { 602 t.Errorf("%d errors", s.ErrorCount) 603 } 604 605 // positions after calling Next 606 s = new(Scanner).Init(strings.NewReader(" foo६४ \n\n本語\n")) 607 checkNextPos(t, s, 1, 1, 2, ' ') 608 s.Peek() // peek doesn't affect the position 609 checkNextPos(t, s, 2, 1, 3, ' ') 610 checkNextPos(t, s, 3, 1, 4, 'f') 611 checkNextPos(t, s, 4, 1, 5, 'o') 612 checkNextPos(t, s, 5, 1, 6, 'o') 613 checkNextPos(t, s, 8, 1, 7, '६') 614 checkNextPos(t, s, 11, 1, 8, '४') 615 checkNextPos(t, s, 12, 1, 9, ' ') 616 checkNextPos(t, s, 13, 1, 10, ' ') 617 checkNextPos(t, s, 14, 2, 1, '\n') 618 checkNextPos(t, s, 15, 3, 1, '\n') 619 checkNextPos(t, s, 18, 3, 2, '本') 620 checkNextPos(t, s, 21, 3, 3, '語') 621 checkNextPos(t, s, 22, 4, 1, '\n') 622 // after EOF position doesn't change 623 for i := 10; i > 0; i-- { 624 checkScanPos(t, s, 22, 4, 1, EOF) 625 } 626 if s.ErrorCount != 0 { 627 t.Errorf("%d errors", s.ErrorCount) 628 } 629 630 // positions after calling Scan 631 s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx")) 632 s.Mode = 0 633 s.Whitespace = 0 634 checkScanPos(t, s, 0, 1, 1, 'a') 635 s.Peek() // peek doesn't affect the position 636 checkScanPos(t, s, 1, 1, 2, 'b') 637 checkScanPos(t, s, 2, 1, 3, 'c') 638 checkScanPos(t, s, 3, 1, 4, '\n') 639 checkScanPos(t, s, 4, 2, 1, '本') 640 checkScanPos(t, s, 7, 2, 2, '語') 641 checkScanPos(t, s, 10, 2, 3, '\n') 642 checkScanPos(t, s, 11, 3, 1, '\n') 643 checkScanPos(t, s, 12, 4, 1, 'x') 644 // after EOF position doesn't change 645 for i := 10; i > 0; i-- { 646 checkScanPos(t, s, 13, 4, 2, EOF) 647 } 648 if s.ErrorCount != 0 { 649 t.Errorf("%d errors", s.ErrorCount) 650 } 651 } 652 653 type countReader int 654 655 func (r *countReader) Read([]byte) (int, error) { 656 *r++ 657 return 0, io.EOF 658 } 659 660 func TestNextEOFHandling(t *testing.T) { 661 var r countReader 662 663 // corner case: empty source 664 s := new(Scanner).Init(&r) 665 666 tok := s.Next() 667 if tok != EOF { 668 t.Error("1) EOF not reported") 669 } 670 671 tok = s.Peek() 672 if tok != EOF { 673 t.Error("2) EOF not reported") 674 } 675 676 if r != 1 { 677 t.Errorf("scanner called Read %d times, not once", r) 678 } 679 } 680 681 func TestScanEOFHandling(t *testing.T) { 682 var r countReader 683 684 // corner case: empty source 685 s := new(Scanner).Init(&r) 686 687 tok := s.Scan() 688 if tok != EOF { 689 t.Error("1) EOF not reported") 690 } 691 692 tok = s.Peek() 693 if tok != EOF { 694 t.Error("2) EOF not reported") 695 } 696 697 if r != 1 { 698 t.Errorf("scanner called Read %d times, not once", r) 699 } 700 } 701 702 func TestIssue29723(t *testing.T) { 703 s := new(Scanner).Init(strings.NewReader(`x "`)) 704 s.Error = func(s *Scanner, _ string) { 705 got := s.TokenText() // this call shouldn't panic 706 const want = `"` 707 if got != want { 708 t.Errorf("got %q; want %q", got, want) 709 } 710 } 711 for r := s.Scan(); r != EOF; r = s.Scan() { 712 } 713 } 714 715 func TestNumbers(t *testing.T) { 716 for _, test := range []struct { 717 tok rune 718 src, tokens, err string 719 }{ 720 // binaries 721 {Int, "0b0", "0b0", ""}, 722 {Int, "0b1010", "0b1010", ""}, 723 {Int, "0B1110", "0B1110", ""}, 724 725 {Int, "0b", "0b", "binary literal has no digits"}, 726 {Int, "0b0190", "0b0190", "invalid digit '9' in binary literal"}, 727 {Int, "0b01a0", "0b01 a0", ""}, // only accept 0-9 728 729 // binary floats (invalid) 730 {Float, "0b.", "0b.", "invalid radix point in binary literal"}, 731 {Float, "0b.1", "0b.1", "invalid radix point in binary literal"}, 732 {Float, "0b1.0", "0b1.0", "invalid radix point in binary literal"}, 733 {Float, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"}, 734 {Float, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"}, 735 736 // octals 737 {Int, "0o0", "0o0", ""}, 738 {Int, "0o1234", "0o1234", ""}, 739 {Int, "0O1234", "0O1234", ""}, 740 741 {Int, "0o", "0o", "octal literal has no digits"}, 742 {Int, "0o8123", "0o8123", "invalid digit '8' in octal literal"}, 743 {Int, "0o1293", "0o1293", "invalid digit '9' in octal literal"}, 744 {Int, "0o12a3", "0o12 a3", ""}, // only accept 0-9 745 746 // octal floats (invalid) 747 {Float, "0o.", "0o.", "invalid radix point in octal literal"}, 748 {Float, "0o.2", "0o.2", "invalid radix point in octal literal"}, 749 {Float, "0o1.2", "0o1.2", "invalid radix point in octal literal"}, 750 {Float, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"}, 751 {Float, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"}, 752 753 // 0-octals 754 {Int, "0", "0", ""}, 755 {Int, "0123", "0123", ""}, 756 757 {Int, "08123", "08123", "invalid digit '8' in octal literal"}, 758 {Int, "01293", "01293", "invalid digit '9' in octal literal"}, 759 {Int, "0F.", "0 F .", ""}, // only accept 0-9 760 {Int, "0123F.", "0123 F .", ""}, 761 {Int, "0123456x", "0123456 x", ""}, 762 763 // decimals 764 {Int, "1", "1", ""}, 765 {Int, "1234", "1234", ""}, 766 767 {Int, "1f", "1 f", ""}, // only accept 0-9 768 769 // decimal floats 770 {Float, "0.", "0.", ""}, 771 {Float, "123.", "123.", ""}, 772 {Float, "0123.", "0123.", ""}, 773 774 {Float, ".0", ".0", ""}, 775 {Float, ".123", ".123", ""}, 776 {Float, ".0123", ".0123", ""}, 777 778 {Float, "0.0", "0.0", ""}, 779 {Float, "123.123", "123.123", ""}, 780 {Float, "0123.0123", "0123.0123", ""}, 781 782 {Float, "0e0", "0e0", ""}, 783 {Float, "123e+0", "123e+0", ""}, 784 {Float, "0123E-1", "0123E-1", ""}, 785 786 {Float, "0.e+1", "0.e+1", ""}, 787 {Float, "123.E-10", "123.E-10", ""}, 788 {Float, "0123.e123", "0123.e123", ""}, 789 790 {Float, ".0e-1", ".0e-1", ""}, 791 {Float, ".123E+10", ".123E+10", ""}, 792 {Float, ".0123E123", ".0123E123", ""}, 793 794 {Float, "0.0e1", "0.0e1", ""}, 795 {Float, "123.123E-10", "123.123E-10", ""}, 796 {Float, "0123.0123e+456", "0123.0123e+456", ""}, 797 798 {Float, "0e", "0e", "exponent has no digits"}, 799 {Float, "0E+", "0E+", "exponent has no digits"}, 800 {Float, "1e+f", "1e+ f", "exponent has no digits"}, 801 {Float, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"}, 802 {Float, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"}, 803 804 // hexadecimals 805 {Int, "0x0", "0x0", ""}, 806 {Int, "0x1234", "0x1234", ""}, 807 {Int, "0xcafef00d", "0xcafef00d", ""}, 808 {Int, "0XCAFEF00D", "0XCAFEF00D", ""}, 809 810 {Int, "0x", "0x", "hexadecimal literal has no digits"}, 811 {Int, "0x1g", "0x1 g", ""}, 812 813 // hexadecimal floats 814 {Float, "0x0p0", "0x0p0", ""}, 815 {Float, "0x12efp-123", "0x12efp-123", ""}, 816 {Float, "0xABCD.p+0", "0xABCD.p+0", ""}, 817 {Float, "0x.0189P-0", "0x.0189P-0", ""}, 818 {Float, "0x1.ffffp+1023", "0x1.ffffp+1023", ""}, 819 820 {Float, "0x.", "0x.", "hexadecimal literal has no digits"}, 821 {Float, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"}, 822 {Float, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"}, 823 {Float, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"}, 824 {Float, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"}, 825 {Float, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"}, 826 {Float, "0x0p", "0x0p", "exponent has no digits"}, 827 {Float, "0xeP-", "0xeP-", "exponent has no digits"}, 828 {Float, "0x1234PAB", "0x1234P AB", "exponent has no digits"}, 829 {Float, "0x1.2p1a", "0x1.2p1 a", ""}, 830 831 // separators 832 {Int, "0b_1000_0001", "0b_1000_0001", ""}, 833 {Int, "0o_600", "0o_600", ""}, 834 {Int, "0_466", "0_466", ""}, 835 {Int, "1_000", "1_000", ""}, 836 {Float, "1_000.000_1", "1_000.000_1", ""}, 837 {Int, "0x_f00d", "0x_f00d", ""}, 838 {Float, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""}, 839 840 {Int, "0b__1000", "0b__1000", "'_' must separate successive digits"}, 841 {Int, "0o60___0", "0o60___0", "'_' must separate successive digits"}, 842 {Int, "0466_", "0466_", "'_' must separate successive digits"}, 843 {Float, "1_.", "1_.", "'_' must separate successive digits"}, 844 {Float, "0._1", "0._1", "'_' must separate successive digits"}, 845 {Float, "2.7_e0", "2.7_e0", "'_' must separate successive digits"}, 846 {Int, "0x___0", "0x___0", "'_' must separate successive digits"}, 847 {Float, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"}, 848 } { 849 s := new(Scanner).Init(strings.NewReader(test.src)) 850 var err string 851 s.Error = func(s *Scanner, msg string) { 852 if err == "" { 853 err = msg 854 } 855 } 856 857 for i, want := range strings.Split(test.tokens, " ") { 858 err = "" 859 tok := s.Scan() 860 lit := s.TokenText() 861 if i == 0 { 862 if tok != test.tok { 863 t.Errorf("%q: got token %s; want %s", test.src, TokenString(tok), TokenString(test.tok)) 864 } 865 if err != test.err { 866 t.Errorf("%q: got error %q; want %q", test.src, err, test.err) 867 } 868 } 869 if lit != want { 870 t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, TokenString(tok), want) 871 } 872 } 873 874 // make sure we read all 875 if tok := s.Scan(); tok != EOF { 876 t.Errorf("%q: got %s; want EOF", test.src, TokenString(tok)) 877 } 878 } 879 } 880 881 func TestIssue30320(t *testing.T) { 882 for _, test := range []struct { 883 in, want string 884 mode uint 885 }{ 886 {"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts}, 887 {"foo0/12/0/5.67", "0 12 0 5 67", ScanInts}, 888 {"xxx1e0yyy", "1 0", ScanInts}, 889 {"1_2", "1_2", ScanInts}, 890 {"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts}, 891 {"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats}, 892 } { 893 got := extractInts(test.in, test.mode) 894 if got != test.want { 895 t.Errorf("%q: got %q; want %q", test.in, got, test.want) 896 } 897 } 898 } 899 900 func extractInts(t string, mode uint) (res string) { 901 var s Scanner 902 s.Init(strings.NewReader(t)) 903 s.Mode = mode 904 for { 905 switch tok := s.Scan(); tok { 906 case Int, Float: 907 if len(res) > 0 { 908 res += " " 909 } 910 res += s.TokenText() 911 case EOF: 912 return 913 } 914 } 915 }