github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/text/scanner/scanner_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package scanner
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  	"testing"
    13  	"unicode/utf8"
    14  )
    15  
    16  // A StringReader delivers its data one string segment at a time via Read.
    17  type StringReader struct {
    18  	data []string
    19  	step int
    20  }
    21  
    22  func (r *StringReader) Read(p []byte) (n int, err error) {
    23  	if r.step < len(r.data) {
    24  		s := r.data[r.step]
    25  		n = copy(p, s)
    26  		r.step++
    27  	} else {
    28  		err = io.EOF
    29  	}
    30  	return
    31  }
    32  
    33  func readRuneSegments(t *testing.T, segments []string) {
    34  	got := ""
    35  	want := strings.Join(segments, "")
    36  	s := new(Scanner).Init(&StringReader{data: segments})
    37  	for {
    38  		ch := s.Next()
    39  		if ch == EOF {
    40  			break
    41  		}
    42  		got += string(ch)
    43  	}
    44  	if got != want {
    45  		t.Errorf("segments=%v got=%s want=%s", segments, got, want)
    46  	}
    47  }
    48  
    49  var segmentList = [][]string{
    50  	{},
    51  	{""},
    52  	{"日", "本語"},
    53  	{"\u65e5", "\u672c", "\u8a9e"},
    54  	{"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
    55  	{"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
    56  	{"Hello", ", ", "World", "!"},
    57  	{"Hello", ", ", "", "World", "!"},
    58  }
    59  
    60  func TestNext(t *testing.T) {
    61  	for _, s := range segmentList {
    62  		readRuneSegments(t, s)
    63  	}
    64  }
    65  
    66  type token struct {
    67  	tok  rune
    68  	text string
    69  }
    70  
    71  var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
    72  
    73  var tokenList = []token{
    74  	{Comment, "// line comments"},
    75  	{Comment, "//"},
    76  	{Comment, "////"},
    77  	{Comment, "// comment"},
    78  	{Comment, "// /* comment */"},
    79  	{Comment, "// // comment //"},
    80  	{Comment, "//" + f100},
    81  
    82  	{Comment, "// general comments"},
    83  	{Comment, "/**/"},
    84  	{Comment, "/***/"},
    85  	{Comment, "/* comment */"},
    86  	{Comment, "/* // comment */"},
    87  	{Comment, "/* /* comment */"},
    88  	{Comment, "/*\n comment\n*/"},
    89  	{Comment, "/*" + f100 + "*/"},
    90  
    91  	{Comment, "// identifiers"},
    92  	{Ident, "a"},
    93  	{Ident, "a0"},
    94  	{Ident, "foobar"},
    95  	{Ident, "abc123"},
    96  	{Ident, "LGTM"},
    97  	{Ident, "_"},
    98  	{Ident, "_abc123"},
    99  	{Ident, "abc123_"},
   100  	{Ident, "_abc_123_"},
   101  	{Ident, "_äöü"},
   102  	{Ident, "_本"},
   103  	{Ident, "äöü"},
   104  	{Ident, "本"},
   105  	{Ident, "a۰۱۸"},
   106  	{Ident, "foo६४"},
   107  	{Ident, "bar9876"},
   108  	{Ident, f100},
   109  
   110  	{Comment, "// decimal ints"},
   111  	{Int, "0"},
   112  	{Int, "1"},
   113  	{Int, "9"},
   114  	{Int, "42"},
   115  	{Int, "1234567890"},
   116  
   117  	{Comment, "// octal ints"},
   118  	{Int, "00"},
   119  	{Int, "01"},
   120  	{Int, "07"},
   121  	{Int, "042"},
   122  	{Int, "01234567"},
   123  
   124  	{Comment, "// hexadecimal ints"},
   125  	{Int, "0x0"},
   126  	{Int, "0x1"},
   127  	{Int, "0xf"},
   128  	{Int, "0x42"},
   129  	{Int, "0x123456789abcDEF"},
   130  	{Int, "0x" + f100},
   131  	{Int, "0X0"},
   132  	{Int, "0X1"},
   133  	{Int, "0XF"},
   134  	{Int, "0X42"},
   135  	{Int, "0X123456789abcDEF"},
   136  	{Int, "0X" + f100},
   137  
   138  	{Comment, "// floats"},
   139  	{Float, "0."},
   140  	{Float, "1."},
   141  	{Float, "42."},
   142  	{Float, "01234567890."},
   143  	{Float, ".0"},
   144  	{Float, ".1"},
   145  	{Float, ".42"},
   146  	{Float, ".0123456789"},
   147  	{Float, "0.0"},
   148  	{Float, "1.0"},
   149  	{Float, "42.0"},
   150  	{Float, "01234567890.0"},
   151  	{Float, "0e0"},
   152  	{Float, "1e0"},
   153  	{Float, "42e0"},
   154  	{Float, "01234567890e0"},
   155  	{Float, "0E0"},
   156  	{Float, "1E0"},
   157  	{Float, "42E0"},
   158  	{Float, "01234567890E0"},
   159  	{Float, "0e+10"},
   160  	{Float, "1e-10"},
   161  	{Float, "42e+10"},
   162  	{Float, "01234567890e-10"},
   163  	{Float, "0E+10"},
   164  	{Float, "1E-10"},
   165  	{Float, "42E+10"},
   166  	{Float, "01234567890E-10"},
   167  
   168  	{Comment, "// chars"},
   169  	{Char, `' '`},
   170  	{Char, `'a'`},
   171  	{Char, `'本'`},
   172  	{Char, `'\a'`},
   173  	{Char, `'\b'`},
   174  	{Char, `'\f'`},
   175  	{Char, `'\n'`},
   176  	{Char, `'\r'`},
   177  	{Char, `'\t'`},
   178  	{Char, `'\v'`},
   179  	{Char, `'\''`},
   180  	{Char, `'\000'`},
   181  	{Char, `'\777'`},
   182  	{Char, `'\x00'`},
   183  	{Char, `'\xff'`},
   184  	{Char, `'\u0000'`},
   185  	{Char, `'\ufA16'`},
   186  	{Char, `'\U00000000'`},
   187  	{Char, `'\U0000ffAB'`},
   188  
   189  	{Comment, "// strings"},
   190  	{String, `" "`},
   191  	{String, `"a"`},
   192  	{String, `"本"`},
   193  	{String, `"\a"`},
   194  	{String, `"\b"`},
   195  	{String, `"\f"`},
   196  	{String, `"\n"`},
   197  	{String, `"\r"`},
   198  	{String, `"\t"`},
   199  	{String, `"\v"`},
   200  	{String, `"\""`},
   201  	{String, `"\000"`},
   202  	{String, `"\777"`},
   203  	{String, `"\x00"`},
   204  	{String, `"\xff"`},
   205  	{String, `"\u0000"`},
   206  	{String, `"\ufA16"`},
   207  	{String, `"\U00000000"`},
   208  	{String, `"\U0000ffAB"`},
   209  	{String, `"` + f100 + `"`},
   210  
   211  	{Comment, "// raw strings"},
   212  	{String, "``"},
   213  	{String, "`\\`"},
   214  	{String, "`" + "\n\n/* foobar */\n\n" + "`"},
   215  	{String, "`" + f100 + "`"},
   216  
   217  	{Comment, "// individual characters"},
   218  	// NUL character is not allowed
   219  	{'\x01', "\x01"},
   220  	{' ' - 1, string(' ' - 1)},
   221  	{'+', "+"},
   222  	{'/', "/"},
   223  	{'.', "."},
   224  	{'~', "~"},
   225  	{'(', "("},
   226  }
   227  
   228  func makeSource(pattern string) *bytes.Buffer {
   229  	var buf bytes.Buffer
   230  	for _, k := range tokenList {
   231  		fmt.Fprintf(&buf, pattern, k.text)
   232  	}
   233  	return &buf
   234  }
   235  
   236  func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
   237  	if got != want {
   238  		t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
   239  	}
   240  	if s.Line != line {
   241  		t.Errorf("line = %d, want %d for %q", s.Line, line, text)
   242  	}
   243  	stext := s.TokenText()
   244  	if stext != text {
   245  		t.Errorf("text = %q, want %q", stext, text)
   246  	} else {
   247  		// check idempotency of TokenText() call
   248  		stext = s.TokenText()
   249  		if stext != text {
   250  			t.Errorf("text = %q, want %q (idempotency check)", stext, text)
   251  		}
   252  	}
   253  }
   254  
   255  func countNewlines(s string) int {
   256  	n := 0
   257  	for _, ch := range s {
   258  		if ch == '\n' {
   259  			n++
   260  		}
   261  	}
   262  	return n
   263  }
   264  
   265  func testScan(t *testing.T, mode uint) {
   266  	s := new(Scanner).Init(makeSource(" \t%s\n"))
   267  	s.Mode = mode
   268  	tok := s.Scan()
   269  	line := 1
   270  	for _, k := range tokenList {
   271  		if mode&SkipComments == 0 || k.tok != Comment {
   272  			checkTok(t, s, line, tok, k.tok, k.text)
   273  			tok = s.Scan()
   274  		}
   275  		line += countNewlines(k.text) + 1 // each token is on a new line
   276  	}
   277  	checkTok(t, s, line, tok, EOF, "")
   278  }
   279  
   280  func TestScan(t *testing.T) {
   281  	testScan(t, GoTokens)
   282  	testScan(t, GoTokens&^SkipComments)
   283  }
   284  
   285  func TestPosition(t *testing.T) {
   286  	src := makeSource("\t\t\t\t%s\n")
   287  	s := new(Scanner).Init(src)
   288  	s.Mode = GoTokens &^ SkipComments
   289  	s.Scan()
   290  	pos := Position{"", 4, 1, 5}
   291  	for _, k := range tokenList {
   292  		if s.Offset != pos.Offset {
   293  			t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
   294  		}
   295  		if s.Line != pos.Line {
   296  			t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
   297  		}
   298  		if s.Column != pos.Column {
   299  			t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
   300  		}
   301  		pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
   302  		pos.Line += countNewlines(k.text) + 1 // each token is on a new line
   303  		s.Scan()
   304  	}
   305  	// make sure there were no token-internal errors reported by scanner
   306  	if s.ErrorCount != 0 {
   307  		t.Errorf("%d errors", s.ErrorCount)
   308  	}
   309  }
   310  
   311  func TestScanZeroMode(t *testing.T) {
   312  	src := makeSource("%s\n")
   313  	str := src.String()
   314  	s := new(Scanner).Init(src)
   315  	s.Mode = 0       // don't recognize any token classes
   316  	s.Whitespace = 0 // don't skip any whitespace
   317  	tok := s.Scan()
   318  	for i, ch := range str {
   319  		if tok != ch {
   320  			t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
   321  		}
   322  		tok = s.Scan()
   323  	}
   324  	if tok != EOF {
   325  		t.Fatalf("tok = %s, want EOF", TokenString(tok))
   326  	}
   327  	if s.ErrorCount != 0 {
   328  		t.Errorf("%d errors", s.ErrorCount)
   329  	}
   330  }
   331  
   332  func testScanSelectedMode(t *testing.T, mode uint, class rune) {
   333  	src := makeSource("%s\n")
   334  	s := new(Scanner).Init(src)
   335  	s.Mode = mode
   336  	tok := s.Scan()
   337  	for tok != EOF {
   338  		if tok < 0 && tok != class {
   339  			t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
   340  		}
   341  		tok = s.Scan()
   342  	}
   343  	if s.ErrorCount != 0 {
   344  		t.Errorf("%d errors", s.ErrorCount)
   345  	}
   346  }
   347  
   348  func TestScanSelectedMask(t *testing.T) {
   349  	testScanSelectedMode(t, 0, 0)
   350  	testScanSelectedMode(t, ScanIdents, Ident)
   351  	// Don't test ScanInts and ScanNumbers since some parts of
   352  	// the floats in the source look like (illegal) octal ints
   353  	// and ScanNumbers may return either Int or Float.
   354  	testScanSelectedMode(t, ScanChars, Char)
   355  	testScanSelectedMode(t, ScanStrings, String)
   356  	testScanSelectedMode(t, SkipComments, 0)
   357  	testScanSelectedMode(t, ScanComments, Comment)
   358  }
   359  
   360  func TestScanNext(t *testing.T) {
   361  	const BOM = '\uFEFF'
   362  	BOMs := string(BOM)
   363  	s := new(Scanner).Init(bytes.NewBufferString(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
   364  	checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
   365  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   366  	checkTok(t, s, 1, s.Scan(), '=', "=")
   367  	checkTok(t, s, 0, s.Next(), '=', "")
   368  	checkTok(t, s, 0, s.Next(), ' ', "")
   369  	checkTok(t, s, 0, s.Next(), 'b', "")
   370  	checkTok(t, s, 1, s.Scan(), Ident, "cd")
   371  	checkTok(t, s, 1, s.Scan(), '{', "{")
   372  	checkTok(t, s, 2, s.Scan(), Ident, "a")
   373  	checkTok(t, s, 2, s.Scan(), '+', "+")
   374  	checkTok(t, s, 0, s.Next(), '=', "")
   375  	checkTok(t, s, 2, s.Scan(), Ident, "c")
   376  	checkTok(t, s, 3, s.Scan(), '}', "}")
   377  	checkTok(t, s, 3, s.Scan(), BOM, BOMs)
   378  	checkTok(t, s, 3, s.Scan(), -1, "")
   379  	if s.ErrorCount != 0 {
   380  		t.Errorf("%d errors", s.ErrorCount)
   381  	}
   382  }
   383  
   384  func TestScanWhitespace(t *testing.T) {
   385  	var buf bytes.Buffer
   386  	var ws uint64
   387  	// start at 1, NUL character is not allowed
   388  	for ch := byte(1); ch < ' '; ch++ {
   389  		buf.WriteByte(ch)
   390  		ws |= 1 << ch
   391  	}
   392  	const orig = 'x'
   393  	buf.WriteByte(orig)
   394  
   395  	s := new(Scanner).Init(&buf)
   396  	s.Mode = 0
   397  	s.Whitespace = ws
   398  	tok := s.Scan()
   399  	if tok != orig {
   400  		t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
   401  	}
   402  }
   403  
   404  func testError(t *testing.T, src, pos, msg string, tok rune) {
   405  	s := new(Scanner).Init(bytes.NewBufferString(src))
   406  	errorCalled := false
   407  	s.Error = func(s *Scanner, m string) {
   408  		if !errorCalled {
   409  			// only look at first error
   410  			if p := s.Pos().String(); p != pos {
   411  				t.Errorf("pos = %q, want %q for %q", p, pos, src)
   412  			}
   413  			if m != msg {
   414  				t.Errorf("msg = %q, want %q for %q", m, msg, src)
   415  			}
   416  			errorCalled = true
   417  		}
   418  	}
   419  	tk := s.Scan()
   420  	if tk != tok {
   421  		t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
   422  	}
   423  	if !errorCalled {
   424  		t.Errorf("error handler not called for %q", src)
   425  	}
   426  	if s.ErrorCount == 0 {
   427  		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
   428  	}
   429  }
   430  
   431  func TestError(t *testing.T) {
   432  	testError(t, "\x00", "1:1", "illegal character NUL", 0)
   433  	testError(t, "\x80", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
   434  	testError(t, "\xff", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
   435  
   436  	testError(t, "a\x00", "1:2", "illegal character NUL", Ident)
   437  	testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", Ident)
   438  	testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", Ident)
   439  
   440  	testError(t, `"a`+"\x00", "1:3", "illegal character NUL", String)
   441  	testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", String)
   442  	testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", String)
   443  
   444  	testError(t, "`a"+"\x00", "1:3", "illegal character NUL", String)
   445  	testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", String)
   446  	testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", String)
   447  
   448  	testError(t, `'\"'`, "1:3", "illegal char escape", Char)
   449  	testError(t, `"\'"`, "1:3", "illegal char escape", String)
   450  
   451  	testError(t, `01238`, "1:6", "illegal octal number", Int)
   452  	testError(t, `01238123`, "1:9", "illegal octal number", Int)
   453  	testError(t, `0x`, "1:3", "illegal hexadecimal number", Int)
   454  	testError(t, `0xg`, "1:3", "illegal hexadecimal number", Int)
   455  	testError(t, `'aa'`, "1:4", "illegal char literal", Char)
   456  
   457  	testError(t, `'`, "1:2", "literal not terminated", Char)
   458  	testError(t, `'`+"\n", "1:2", "literal not terminated", Char)
   459  	testError(t, `"abc`, "1:5", "literal not terminated", String)
   460  	testError(t, `"abc`+"\n", "1:5", "literal not terminated", String)
   461  	testError(t, "`abc\n", "2:1", "literal not terminated", String)
   462  	testError(t, `/*/`, "1:4", "comment not terminated", EOF)
   463  }
   464  
   465  func checkPos(t *testing.T, got, want Position) {
   466  	if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
   467  		t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
   468  			got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
   469  	}
   470  }
   471  
   472  func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   473  	if ch := s.Next(); ch != char {
   474  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   475  	}
   476  	want := Position{Offset: offset, Line: line, Column: column}
   477  	checkPos(t, s.Pos(), want)
   478  }
   479  
   480  func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   481  	want := Position{Offset: offset, Line: line, Column: column}
   482  	checkPos(t, s.Pos(), want)
   483  	if ch := s.Scan(); ch != char {
   484  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   485  		if string(ch) != s.TokenText() {
   486  			t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
   487  		}
   488  	}
   489  	checkPos(t, s.Position, want)
   490  }
   491  
   492  func TestPos(t *testing.T) {
   493  	// corner case: empty source
   494  	s := new(Scanner).Init(bytes.NewBufferString(""))
   495  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   496  	s.Peek() // peek doesn't affect the position
   497  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   498  
   499  	// corner case: source with only a newline
   500  	s = new(Scanner).Init(bytes.NewBufferString("\n"))
   501  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   502  	checkNextPos(t, s, 1, 2, 1, '\n')
   503  	// after EOF position doesn't change
   504  	for i := 10; i > 0; i-- {
   505  		checkScanPos(t, s, 1, 2, 1, EOF)
   506  	}
   507  	if s.ErrorCount != 0 {
   508  		t.Errorf("%d errors", s.ErrorCount)
   509  	}
   510  
   511  	// corner case: source with only a single character
   512  	s = new(Scanner).Init(bytes.NewBufferString("本"))
   513  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   514  	checkNextPos(t, s, 3, 1, 2, '本')
   515  	// after EOF position doesn't change
   516  	for i := 10; i > 0; i-- {
   517  		checkScanPos(t, s, 3, 1, 2, EOF)
   518  	}
   519  	if s.ErrorCount != 0 {
   520  		t.Errorf("%d errors", s.ErrorCount)
   521  	}
   522  
   523  	// positions after calling Next
   524  	s = new(Scanner).Init(bytes.NewBufferString("  foo६४  \n\n本語\n"))
   525  	checkNextPos(t, s, 1, 1, 2, ' ')
   526  	s.Peek() // peek doesn't affect the position
   527  	checkNextPos(t, s, 2, 1, 3, ' ')
   528  	checkNextPos(t, s, 3, 1, 4, 'f')
   529  	checkNextPos(t, s, 4, 1, 5, 'o')
   530  	checkNextPos(t, s, 5, 1, 6, 'o')
   531  	checkNextPos(t, s, 8, 1, 7, '६')
   532  	checkNextPos(t, s, 11, 1, 8, '४')
   533  	checkNextPos(t, s, 12, 1, 9, ' ')
   534  	checkNextPos(t, s, 13, 1, 10, ' ')
   535  	checkNextPos(t, s, 14, 2, 1, '\n')
   536  	checkNextPos(t, s, 15, 3, 1, '\n')
   537  	checkNextPos(t, s, 18, 3, 2, '本')
   538  	checkNextPos(t, s, 21, 3, 3, '語')
   539  	checkNextPos(t, s, 22, 4, 1, '\n')
   540  	// after EOF position doesn't change
   541  	for i := 10; i > 0; i-- {
   542  		checkScanPos(t, s, 22, 4, 1, EOF)
   543  	}
   544  	if s.ErrorCount != 0 {
   545  		t.Errorf("%d errors", s.ErrorCount)
   546  	}
   547  
   548  	// positions after calling Scan
   549  	s = new(Scanner).Init(bytes.NewBufferString("abc\n本語\n\nx"))
   550  	s.Mode = 0
   551  	s.Whitespace = 0
   552  	checkScanPos(t, s, 0, 1, 1, 'a')
   553  	s.Peek() // peek doesn't affect the position
   554  	checkScanPos(t, s, 1, 1, 2, 'b')
   555  	checkScanPos(t, s, 2, 1, 3, 'c')
   556  	checkScanPos(t, s, 3, 1, 4, '\n')
   557  	checkScanPos(t, s, 4, 2, 1, '本')
   558  	checkScanPos(t, s, 7, 2, 2, '語')
   559  	checkScanPos(t, s, 10, 2, 3, '\n')
   560  	checkScanPos(t, s, 11, 3, 1, '\n')
   561  	checkScanPos(t, s, 12, 4, 1, 'x')
   562  	// after EOF position doesn't change
   563  	for i := 10; i > 0; i-- {
   564  		checkScanPos(t, s, 13, 4, 2, EOF)
   565  	}
   566  	if s.ErrorCount != 0 {
   567  		t.Errorf("%d errors", s.ErrorCount)
   568  	}
   569  }