github.com/tcnksm/go@v0.0.0-20141208075154-439b32936367/src/text/scanner/scanner_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package scanner 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "strings" 12 "testing" 13 "unicode/utf8" 14 ) 15 16 // A StringReader delivers its data one string segment at a time via Read. 17 type StringReader struct { 18 data []string 19 step int 20 } 21 22 func (r *StringReader) Read(p []byte) (n int, err error) { 23 if r.step < len(r.data) { 24 s := r.data[r.step] 25 n = copy(p, s) 26 r.step++ 27 } else { 28 err = io.EOF 29 } 30 return 31 } 32 33 func readRuneSegments(t *testing.T, segments []string) { 34 got := "" 35 want := strings.Join(segments, "") 36 s := new(Scanner).Init(&StringReader{data: segments}) 37 for { 38 ch := s.Next() 39 if ch == EOF { 40 break 41 } 42 got += string(ch) 43 } 44 if got != want { 45 t.Errorf("segments=%v got=%s want=%s", segments, got, want) 46 } 47 } 48 49 var segmentList = [][]string{ 50 {}, 51 {""}, 52 {"日", "本語"}, 53 {"\u65e5", "\u672c", "\u8a9e"}, 54 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"}, 55 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"}, 56 {"Hello", ", ", "World", "!"}, 57 {"Hello", ", ", "", "World", "!"}, 58 } 59 60 func TestNext(t *testing.T) { 61 for _, s := range segmentList { 62 readRuneSegments(t, s) 63 } 64 } 65 66 type token struct { 67 tok rune 68 text string 69 } 70 71 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" 72 73 var tokenList = []token{ 74 {Comment, "// line comments"}, 75 {Comment, "//"}, 76 {Comment, "////"}, 77 {Comment, "// comment"}, 78 {Comment, "// /* comment */"}, 79 {Comment, "// // comment //"}, 80 {Comment, "//" + f100}, 81 82 {Comment, "// general comments"}, 83 {Comment, "/**/"}, 84 {Comment, "/***/"}, 85 {Comment, "/* comment */"}, 86 {Comment, "/* // comment */"}, 87 {Comment, "/* /* comment */"}, 88 {Comment, "/*\n comment\n*/"}, 89 {Comment, "/*" + f100 + "*/"}, 90 91 {Comment, "// identifiers"}, 92 {Ident, "a"}, 93 {Ident, "a0"}, 94 {Ident, "foobar"}, 95 {Ident, "abc123"}, 96 {Ident, "LGTM"}, 97 {Ident, "_"}, 98 {Ident, "_abc123"}, 99 {Ident, "abc123_"}, 100 {Ident, "_abc_123_"}, 101 {Ident, "_äöü"}, 102 {Ident, "_本"}, 103 {Ident, "äöü"}, 104 {Ident, "本"}, 105 {Ident, "a۰۱۸"}, 106 {Ident, "foo६४"}, 107 {Ident, "bar9876"}, 108 {Ident, f100}, 109 110 {Comment, "// decimal ints"}, 111 {Int, "0"}, 112 {Int, "1"}, 113 {Int, "9"}, 114 {Int, "42"}, 115 {Int, "1234567890"}, 116 117 {Comment, "// octal ints"}, 118 {Int, "00"}, 119 {Int, "01"}, 120 {Int, "07"}, 121 {Int, "042"}, 122 {Int, "01234567"}, 123 124 {Comment, "// hexadecimal ints"}, 125 {Int, "0x0"}, 126 {Int, "0x1"}, 127 {Int, "0xf"}, 128 {Int, "0x42"}, 129 {Int, "0x123456789abcDEF"}, 130 {Int, "0x" + f100}, 131 {Int, "0X0"}, 132 {Int, "0X1"}, 133 {Int, "0XF"}, 134 {Int, "0X42"}, 135 {Int, "0X123456789abcDEF"}, 136 {Int, "0X" + f100}, 137 138 {Comment, "// floats"}, 139 {Float, "0."}, 140 {Float, "1."}, 141 {Float, "42."}, 142 {Float, "01234567890."}, 143 {Float, ".0"}, 144 {Float, ".1"}, 145 {Float, ".42"}, 146 {Float, ".0123456789"}, 147 {Float, "0.0"}, 148 {Float, "1.0"}, 149 {Float, "42.0"}, 150 {Float, "01234567890.0"}, 151 {Float, "0e0"}, 152 {Float, "1e0"}, 153 {Float, "42e0"}, 154 {Float, "01234567890e0"}, 155 {Float, "0E0"}, 156 {Float, "1E0"}, 157 {Float, "42E0"}, 158 {Float, "01234567890E0"}, 159 {Float, "0e+10"}, 160 {Float, "1e-10"}, 161 {Float, "42e+10"}, 162 {Float, "01234567890e-10"}, 163 {Float, "0E+10"}, 164 {Float, "1E-10"}, 165 {Float, "42E+10"}, 166 {Float, "01234567890E-10"}, 167 168 {Comment, "// chars"}, 169 {Char, `' '`}, 170 {Char, `'a'`}, 171 {Char, `'本'`}, 172 {Char, `'\a'`}, 173 {Char, `'\b'`}, 174 {Char, `'\f'`}, 175 {Char, `'\n'`}, 176 {Char, `'\r'`}, 177 {Char, `'\t'`}, 178 {Char, `'\v'`}, 179 {Char, `'\''`}, 180 {Char, `'\000'`}, 181 {Char, `'\777'`}, 182 {Char, `'\x00'`}, 183 {Char, `'\xff'`}, 184 {Char, `'\u0000'`}, 185 {Char, `'\ufA16'`}, 186 {Char, `'\U00000000'`}, 187 {Char, `'\U0000ffAB'`}, 188 189 {Comment, "// strings"}, 190 {String, `" "`}, 191 {String, `"a"`}, 192 {String, `"本"`}, 193 {String, `"\a"`}, 194 {String, `"\b"`}, 195 {String, `"\f"`}, 196 {String, `"\n"`}, 197 {String, `"\r"`}, 198 {String, `"\t"`}, 199 {String, `"\v"`}, 200 {String, `"\""`}, 201 {String, `"\000"`}, 202 {String, `"\777"`}, 203 {String, `"\x00"`}, 204 {String, `"\xff"`}, 205 {String, `"\u0000"`}, 206 {String, `"\ufA16"`}, 207 {String, `"\U00000000"`}, 208 {String, `"\U0000ffAB"`}, 209 {String, `"` + f100 + `"`}, 210 211 {Comment, "// raw strings"}, 212 {String, "``"}, 213 {String, "`\\`"}, 214 {String, "`" + "\n\n/* foobar */\n\n" + "`"}, 215 {String, "`" + f100 + "`"}, 216 217 {Comment, "// individual characters"}, 218 // NUL character is not allowed 219 {'\x01', "\x01"}, 220 {' ' - 1, string(' ' - 1)}, 221 {'+', "+"}, 222 {'/', "/"}, 223 {'.', "."}, 224 {'~', "~"}, 225 {'(', "("}, 226 } 227 228 func makeSource(pattern string) *bytes.Buffer { 229 var buf bytes.Buffer 230 for _, k := range tokenList { 231 fmt.Fprintf(&buf, pattern, k.text) 232 } 233 return &buf 234 } 235 236 func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) { 237 if got != want { 238 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text) 239 } 240 if s.Line != line { 241 t.Errorf("line = %d, want %d for %q", s.Line, line, text) 242 } 243 stext := s.TokenText() 244 if stext != text { 245 t.Errorf("text = %q, want %q", stext, text) 246 } else { 247 // check idempotency of TokenText() call 248 stext = s.TokenText() 249 if stext != text { 250 t.Errorf("text = %q, want %q (idempotency check)", stext, text) 251 } 252 } 253 } 254 255 func countNewlines(s string) int { 256 n := 0 257 for _, ch := range s { 258 if ch == '\n' { 259 n++ 260 } 261 } 262 return n 263 } 264 265 func testScan(t *testing.T, mode uint) { 266 s := new(Scanner).Init(makeSource(" \t%s\n")) 267 s.Mode = mode 268 tok := s.Scan() 269 line := 1 270 for _, k := range tokenList { 271 if mode&SkipComments == 0 || k.tok != Comment { 272 checkTok(t, s, line, tok, k.tok, k.text) 273 tok = s.Scan() 274 } 275 line += countNewlines(k.text) + 1 // each token is on a new line 276 } 277 checkTok(t, s, line, tok, EOF, "") 278 } 279 280 func TestScan(t *testing.T) { 281 testScan(t, GoTokens) 282 testScan(t, GoTokens&^SkipComments) 283 } 284 285 func TestPosition(t *testing.T) { 286 src := makeSource("\t\t\t\t%s\n") 287 s := new(Scanner).Init(src) 288 s.Mode = GoTokens &^ SkipComments 289 s.Scan() 290 pos := Position{"", 4, 1, 5} 291 for _, k := range tokenList { 292 if s.Offset != pos.Offset { 293 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text) 294 } 295 if s.Line != pos.Line { 296 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text) 297 } 298 if s.Column != pos.Column { 299 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text) 300 } 301 pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline 302 pos.Line += countNewlines(k.text) + 1 // each token is on a new line 303 s.Scan() 304 } 305 // make sure there were no token-internal errors reported by scanner 306 if s.ErrorCount != 0 { 307 t.Errorf("%d errors", s.ErrorCount) 308 } 309 } 310 311 func TestScanZeroMode(t *testing.T) { 312 src := makeSource("%s\n") 313 str := src.String() 314 s := new(Scanner).Init(src) 315 s.Mode = 0 // don't recognize any token classes 316 s.Whitespace = 0 // don't skip any whitespace 317 tok := s.Scan() 318 for i, ch := range str { 319 if tok != ch { 320 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch)) 321 } 322 tok = s.Scan() 323 } 324 if tok != EOF { 325 t.Fatalf("tok = %s, want EOF", TokenString(tok)) 326 } 327 if s.ErrorCount != 0 { 328 t.Errorf("%d errors", s.ErrorCount) 329 } 330 } 331 332 func testScanSelectedMode(t *testing.T, mode uint, class rune) { 333 src := makeSource("%s\n") 334 s := new(Scanner).Init(src) 335 s.Mode = mode 336 tok := s.Scan() 337 for tok != EOF { 338 if tok < 0 && tok != class { 339 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class)) 340 } 341 tok = s.Scan() 342 } 343 if s.ErrorCount != 0 { 344 t.Errorf("%d errors", s.ErrorCount) 345 } 346 } 347 348 func TestScanSelectedMask(t *testing.T) { 349 testScanSelectedMode(t, 0, 0) 350 testScanSelectedMode(t, ScanIdents, Ident) 351 // Don't test ScanInts and ScanNumbers since some parts of 352 // the floats in the source look like (illegal) octal ints 353 // and ScanNumbers may return either Int or Float. 354 testScanSelectedMode(t, ScanChars, Char) 355 testScanSelectedMode(t, ScanStrings, String) 356 testScanSelectedMode(t, SkipComments, 0) 357 testScanSelectedMode(t, ScanComments, Comment) 358 } 359 360 func TestScanCustomIdent(t *testing.T) { 361 const src = "faab12345 a12b123 a12 3b" 362 s := new(Scanner).Init(strings.NewReader(src)) 363 // ident = ( 'a' | 'b' ) { digit } . 364 // digit = '0' .. '3' . 365 // with a maximum length of 4 366 s.IsIdentRune = func(ch rune, i int) bool { 367 return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3' 368 } 369 checkTok(t, s, 1, s.Scan(), 'f', "f") 370 checkTok(t, s, 1, s.Scan(), Ident, "a") 371 checkTok(t, s, 1, s.Scan(), Ident, "a") 372 checkTok(t, s, 1, s.Scan(), Ident, "b123") 373 checkTok(t, s, 1, s.Scan(), Int, "45") 374 checkTok(t, s, 1, s.Scan(), Ident, "a12") 375 checkTok(t, s, 1, s.Scan(), Ident, "b123") 376 checkTok(t, s, 1, s.Scan(), Ident, "a12") 377 checkTok(t, s, 1, s.Scan(), Int, "3") 378 checkTok(t, s, 1, s.Scan(), Ident, "b") 379 checkTok(t, s, 1, s.Scan(), EOF, "") 380 } 381 382 func TestScanNext(t *testing.T) { 383 const BOM = '\uFEFF' 384 BOMs := string(BOM) 385 s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof")) 386 checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored 387 checkTok(t, s, 1, s.Scan(), Ident, "a") 388 checkTok(t, s, 1, s.Scan(), '=', "=") 389 checkTok(t, s, 0, s.Next(), '=', "") 390 checkTok(t, s, 0, s.Next(), ' ', "") 391 checkTok(t, s, 0, s.Next(), 'b', "") 392 checkTok(t, s, 1, s.Scan(), Ident, "cd") 393 checkTok(t, s, 1, s.Scan(), '{', "{") 394 checkTok(t, s, 2, s.Scan(), Ident, "a") 395 checkTok(t, s, 2, s.Scan(), '+', "+") 396 checkTok(t, s, 0, s.Next(), '=', "") 397 checkTok(t, s, 2, s.Scan(), Ident, "c") 398 checkTok(t, s, 3, s.Scan(), '}', "}") 399 checkTok(t, s, 3, s.Scan(), BOM, BOMs) 400 checkTok(t, s, 3, s.Scan(), -1, "") 401 if s.ErrorCount != 0 { 402 t.Errorf("%d errors", s.ErrorCount) 403 } 404 } 405 406 func TestScanWhitespace(t *testing.T) { 407 var buf bytes.Buffer 408 var ws uint64 409 // start at 1, NUL character is not allowed 410 for ch := byte(1); ch < ' '; ch++ { 411 buf.WriteByte(ch) 412 ws |= 1 << ch 413 } 414 const orig = 'x' 415 buf.WriteByte(orig) 416 417 s := new(Scanner).Init(&buf) 418 s.Mode = 0 419 s.Whitespace = ws 420 tok := s.Scan() 421 if tok != orig { 422 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig)) 423 } 424 } 425 426 func testError(t *testing.T, src, pos, msg string, tok rune) { 427 s := new(Scanner).Init(strings.NewReader(src)) 428 errorCalled := false 429 s.Error = func(s *Scanner, m string) { 430 if !errorCalled { 431 // only look at first error 432 if p := s.Pos().String(); p != pos { 433 t.Errorf("pos = %q, want %q for %q", p, pos, src) 434 } 435 if m != msg { 436 t.Errorf("msg = %q, want %q for %q", m, msg, src) 437 } 438 errorCalled = true 439 } 440 } 441 tk := s.Scan() 442 if tk != tok { 443 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src) 444 } 445 if !errorCalled { 446 t.Errorf("error handler not called for %q", src) 447 } 448 if s.ErrorCount == 0 { 449 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) 450 } 451 } 452 453 func TestError(t *testing.T) { 454 testError(t, "\x00", "1:1", "illegal character NUL", 0) 455 testError(t, "\x80", "1:1", "illegal UTF-8 encoding", utf8.RuneError) 456 testError(t, "\xff", "1:1", "illegal UTF-8 encoding", utf8.RuneError) 457 458 testError(t, "a\x00", "1:2", "illegal character NUL", Ident) 459 testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", Ident) 460 testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", Ident) 461 462 testError(t, `"a`+"\x00", "1:3", "illegal character NUL", String) 463 testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", String) 464 testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", String) 465 466 testError(t, "`a"+"\x00", "1:3", "illegal character NUL", String) 467 testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", String) 468 testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", String) 469 470 testError(t, `'\"'`, "1:3", "illegal char escape", Char) 471 testError(t, `"\'"`, "1:3", "illegal char escape", String) 472 473 testError(t, `01238`, "1:6", "illegal octal number", Int) 474 testError(t, `01238123`, "1:9", "illegal octal number", Int) 475 testError(t, `0x`, "1:3", "illegal hexadecimal number", Int) 476 testError(t, `0xg`, "1:3", "illegal hexadecimal number", Int) 477 testError(t, `'aa'`, "1:4", "illegal char literal", Char) 478 479 testError(t, `'`, "1:2", "literal not terminated", Char) 480 testError(t, `'`+"\n", "1:2", "literal not terminated", Char) 481 testError(t, `"abc`, "1:5", "literal not terminated", String) 482 testError(t, `"abc`+"\n", "1:5", "literal not terminated", String) 483 testError(t, "`abc\n", "2:1", "literal not terminated", String) 484 testError(t, `/*/`, "1:4", "comment not terminated", EOF) 485 } 486 487 // An errReader returns (0, err) where err is not io.EOF. 488 type errReader struct{} 489 490 func (errReader) Read(b []byte) (int, error) { 491 return 0, io.ErrNoProgress // some error that is not io.EOF 492 } 493 494 func TestIOError(t *testing.T) { 495 s := new(Scanner).Init(errReader{}) 496 errorCalled := false 497 s.Error = func(s *Scanner, msg string) { 498 if !errorCalled { 499 if want := io.ErrNoProgress.Error(); msg != want { 500 t.Errorf("msg = %q, want %q", msg, want) 501 } 502 errorCalled = true 503 } 504 } 505 tok := s.Scan() 506 if tok != EOF { 507 t.Errorf("tok = %s, want EOF", TokenString(tok)) 508 } 509 if !errorCalled { 510 t.Errorf("error handler not called") 511 } 512 } 513 514 func checkPos(t *testing.T, got, want Position) { 515 if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column { 516 t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d", 517 got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column) 518 } 519 } 520 521 func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) { 522 if ch := s.Next(); ch != char { 523 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char)) 524 } 525 want := Position{Offset: offset, Line: line, Column: column} 526 checkPos(t, s.Pos(), want) 527 } 528 529 func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) { 530 want := Position{Offset: offset, Line: line, Column: column} 531 checkPos(t, s.Pos(), want) 532 if ch := s.Scan(); ch != char { 533 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char)) 534 if string(ch) != s.TokenText() { 535 t.Errorf("tok = %q, want %q", s.TokenText(), string(ch)) 536 } 537 } 538 checkPos(t, s.Position, want) 539 } 540 541 func TestPos(t *testing.T) { 542 // corner case: empty source 543 s := new(Scanner).Init(strings.NewReader("")) 544 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 545 s.Peek() // peek doesn't affect the position 546 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 547 548 // corner case: source with only a newline 549 s = new(Scanner).Init(strings.NewReader("\n")) 550 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 551 checkNextPos(t, s, 1, 2, 1, '\n') 552 // after EOF position doesn't change 553 for i := 10; i > 0; i-- { 554 checkScanPos(t, s, 1, 2, 1, EOF) 555 } 556 if s.ErrorCount != 0 { 557 t.Errorf("%d errors", s.ErrorCount) 558 } 559 560 // corner case: source with only a single character 561 s = new(Scanner).Init(strings.NewReader("本")) 562 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1}) 563 checkNextPos(t, s, 3, 1, 2, '本') 564 // after EOF position doesn't change 565 for i := 10; i > 0; i-- { 566 checkScanPos(t, s, 3, 1, 2, EOF) 567 } 568 if s.ErrorCount != 0 { 569 t.Errorf("%d errors", s.ErrorCount) 570 } 571 572 // positions after calling Next 573 s = new(Scanner).Init(strings.NewReader(" foo६४ \n\n本語\n")) 574 checkNextPos(t, s, 1, 1, 2, ' ') 575 s.Peek() // peek doesn't affect the position 576 checkNextPos(t, s, 2, 1, 3, ' ') 577 checkNextPos(t, s, 3, 1, 4, 'f') 578 checkNextPos(t, s, 4, 1, 5, 'o') 579 checkNextPos(t, s, 5, 1, 6, 'o') 580 checkNextPos(t, s, 8, 1, 7, '६') 581 checkNextPos(t, s, 11, 1, 8, '४') 582 checkNextPos(t, s, 12, 1, 9, ' ') 583 checkNextPos(t, s, 13, 1, 10, ' ') 584 checkNextPos(t, s, 14, 2, 1, '\n') 585 checkNextPos(t, s, 15, 3, 1, '\n') 586 checkNextPos(t, s, 18, 3, 2, '本') 587 checkNextPos(t, s, 21, 3, 3, '語') 588 checkNextPos(t, s, 22, 4, 1, '\n') 589 // after EOF position doesn't change 590 for i := 10; i > 0; i-- { 591 checkScanPos(t, s, 22, 4, 1, EOF) 592 } 593 if s.ErrorCount != 0 { 594 t.Errorf("%d errors", s.ErrorCount) 595 } 596 597 // positions after calling Scan 598 s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx")) 599 s.Mode = 0 600 s.Whitespace = 0 601 checkScanPos(t, s, 0, 1, 1, 'a') 602 s.Peek() // peek doesn't affect the position 603 checkScanPos(t, s, 1, 1, 2, 'b') 604 checkScanPos(t, s, 2, 1, 3, 'c') 605 checkScanPos(t, s, 3, 1, 4, '\n') 606 checkScanPos(t, s, 4, 2, 1, '本') 607 checkScanPos(t, s, 7, 2, 2, '語') 608 checkScanPos(t, s, 10, 2, 3, '\n') 609 checkScanPos(t, s, 11, 3, 1, '\n') 610 checkScanPos(t, s, 12, 4, 1, 'x') 611 // after EOF position doesn't change 612 for i := 10; i > 0; i-- { 613 checkScanPos(t, s, 13, 4, 2, EOF) 614 } 615 if s.ErrorCount != 0 { 616 t.Errorf("%d errors", s.ErrorCount) 617 } 618 }