github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/cmd/compile/internal/syntax/scanner_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import ( 8 "fmt" 9 "os" 10 "testing" 11 ) 12 13 func TestScanner(t *testing.T) { 14 if testing.Short() { 15 t.Skip("skipping test in short mode") 16 } 17 18 src, err := os.Open("parser.go") 19 if err != nil { 20 t.Fatal(err) 21 } 22 defer src.Close() 23 24 var s scanner 25 s.init(src, nil, nil) 26 for { 27 s.next() 28 if s.tok == _EOF { 29 break 30 } 31 switch s.tok { 32 case _Name: 33 fmt.Println(s.line, s.tok, "=>", s.lit) 34 case _Operator: 35 fmt.Println(s.line, s.tok, "=>", s.op, s.prec) 36 default: 37 fmt.Println(s.line, s.tok) 38 } 39 } 40 } 41 42 func TestTokens(t *testing.T) { 43 // make source 44 var buf []byte 45 for i, s := range sampleTokens { 46 buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation 47 buf = append(buf, s.src...) // token 48 buf = append(buf, " "[:i&7]...) // trailing spaces 49 buf = append(buf, "/* foo */ // bar\n"...) // comments 50 } 51 52 // scan source 53 var got scanner 54 got.init(&bytesReader{buf}, nil, nil) 55 got.next() 56 for i, want := range sampleTokens { 57 nlsemi := false 58 59 if got.line != uint(i+linebase) { 60 t.Errorf("got line %d; want %d", got.line, i+linebase) 61 } 62 63 if got.tok != want.tok { 64 t.Errorf("got tok = %s; want %s", got.tok, want.tok) 65 continue 66 } 67 68 switch want.tok { 69 case _Semi: 70 if got.lit != "semicolon" { 71 t.Errorf("got %s; want semicolon", got.lit) 72 } 73 74 case _Name, _Literal: 75 if got.lit != want.src { 76 t.Errorf("got lit = %q; want %q", got.lit, want.src) 77 continue 78 } 79 nlsemi = true 80 81 case _Operator, _AssignOp, _IncOp: 82 if got.op != want.op { 83 t.Errorf("got op = %s; want %s", got.op, want.op) 84 continue 85 } 86 if got.prec != want.prec { 87 t.Errorf("got prec = %d; want %d", got.prec, want.prec) 88 continue 89 } 90 nlsemi = want.tok == _IncOp 91 92 case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: 93 nlsemi = true 94 } 95 96 if nlsemi { 97 got.next() 98 if got.tok != _Semi { 99 t.Errorf("got tok = %s; want ;", got.tok) 100 continue 101 } 102 if got.lit != "newline" { 103 t.Errorf("got %s; want newline", got.lit) 104 } 105 } 106 107 got.next() 108 } 109 110 if got.tok != _EOF { 111 t.Errorf("got %q; want _EOF", got.tok) 112 } 113 } 114 115 var sampleTokens = [...]struct { 116 tok token 117 src string 118 op Operator 119 prec int 120 }{ 121 // name samples 122 {_Name, "x", 0, 0}, 123 {_Name, "X123", 0, 0}, 124 {_Name, "foo", 0, 0}, 125 {_Name, "Foo123", 0, 0}, 126 {_Name, "foo_bar", 0, 0}, 127 {_Name, "_", 0, 0}, 128 {_Name, "_foobar", 0, 0}, 129 {_Name, "a۰۱۸", 0, 0}, 130 {_Name, "foo६४", 0, 0}, 131 {_Name, "bar9876", 0, 0}, 132 {_Name, "ŝ", 0, 0}, 133 {_Name, "ŝfoo", 0, 0}, 134 135 // literal samples 136 {_Literal, "0", 0, 0}, 137 {_Literal, "1", 0, 0}, 138 {_Literal, "12345", 0, 0}, 139 {_Literal, "123456789012345678890123456789012345678890", 0, 0}, 140 {_Literal, "01234567", 0, 0}, 141 {_Literal, "0x0", 0, 0}, 142 {_Literal, "0xcafebabe", 0, 0}, 143 {_Literal, "0.", 0, 0}, 144 {_Literal, "0.e0", 0, 0}, 145 {_Literal, "0.e-1", 0, 0}, 146 {_Literal, "0.e+123", 0, 0}, 147 {_Literal, ".0", 0, 0}, 148 {_Literal, ".0E00", 0, 0}, 149 {_Literal, ".0E-0123", 0, 0}, 150 {_Literal, ".0E+12345678901234567890", 0, 0}, 151 {_Literal, ".45e1", 0, 0}, 152 {_Literal, "3.14159265", 0, 0}, 153 {_Literal, "1e0", 0, 0}, 154 {_Literal, "1e+100", 0, 0}, 155 {_Literal, "1e-100", 0, 0}, 156 {_Literal, "2.71828e-1000", 0, 0}, 157 {_Literal, "0i", 0, 0}, 158 {_Literal, "1i", 0, 0}, 159 {_Literal, "012345678901234567889i", 0, 0}, 160 {_Literal, "123456789012345678890i", 0, 0}, 161 {_Literal, "0.i", 0, 0}, 162 {_Literal, ".0i", 0, 0}, 163 {_Literal, "3.14159265i", 0, 0}, 164 {_Literal, "1e0i", 0, 0}, 165 {_Literal, "1e+100i", 0, 0}, 166 {_Literal, "1e-100i", 0, 0}, 167 {_Literal, "2.71828e-1000i", 0, 0}, 168 {_Literal, "'a'", 0, 0}, 169 {_Literal, "'\\000'", 0, 0}, 170 {_Literal, "'\\xFF'", 0, 0}, 171 {_Literal, "'\\uff16'", 0, 0}, 172 {_Literal, "'\\U0000ff16'", 0, 0}, 173 {_Literal, "`foobar`", 0, 0}, 174 {_Literal, "`foo\tbar`", 0, 0}, 175 {_Literal, "`\r`", 0, 0}, 176 177 // operators 178 {_Operator, "||", OrOr, precOrOr}, 179 180 {_Operator, "&&", AndAnd, precAndAnd}, 181 182 {_Operator, "==", Eql, precCmp}, 183 {_Operator, "!=", Neq, precCmp}, 184 {_Operator, "<", Lss, precCmp}, 185 {_Operator, "<=", Leq, precCmp}, 186 {_Operator, ">", Gtr, precCmp}, 187 {_Operator, ">=", Geq, precCmp}, 188 189 {_Operator, "+", Add, precAdd}, 190 {_Operator, "-", Sub, precAdd}, 191 {_Operator, "|", Or, precAdd}, 192 {_Operator, "^", Xor, precAdd}, 193 194 {_Star, "*", Mul, precMul}, 195 {_Operator, "/", Div, precMul}, 196 {_Operator, "%", Rem, precMul}, 197 {_Operator, "&", And, precMul}, 198 {_Operator, "&^", AndNot, precMul}, 199 {_Operator, "<<", Shl, precMul}, 200 {_Operator, ">>", Shr, precMul}, 201 202 // assignment operations 203 {_AssignOp, "+=", Add, precAdd}, 204 {_AssignOp, "-=", Sub, precAdd}, 205 {_AssignOp, "|=", Or, precAdd}, 206 {_AssignOp, "^=", Xor, precAdd}, 207 208 {_AssignOp, "*=", Mul, precMul}, 209 {_AssignOp, "/=", Div, precMul}, 210 {_AssignOp, "%=", Rem, precMul}, 211 {_AssignOp, "&=", And, precMul}, 212 {_AssignOp, "&^=", AndNot, precMul}, 213 {_AssignOp, "<<=", Shl, precMul}, 214 {_AssignOp, ">>=", Shr, precMul}, 215 216 // other operations 217 {_IncOp, "++", Add, precAdd}, 218 {_IncOp, "--", Sub, precAdd}, 219 {_Assign, "=", 0, 0}, 220 {_Define, ":=", 0, 0}, 221 {_Arrow, "<-", 0, 0}, 222 223 // delimiters 224 {_Lparen, "(", 0, 0}, 225 {_Lbrack, "[", 0, 0}, 226 {_Lbrace, "{", 0, 0}, 227 {_Rparen, ")", 0, 0}, 228 {_Rbrack, "]", 0, 0}, 229 {_Rbrace, "}", 0, 0}, 230 {_Comma, ",", 0, 0}, 231 {_Semi, ";", 0, 0}, 232 {_Colon, ":", 0, 0}, 233 {_Dot, ".", 0, 0}, 234 {_DotDotDot, "...", 0, 0}, 235 236 // keywords 237 {_Break, "break", 0, 0}, 238 {_Case, "case", 0, 0}, 239 {_Chan, "chan", 0, 0}, 240 {_Const, "const", 0, 0}, 241 {_Continue, "continue", 0, 0}, 242 {_Default, "default", 0, 0}, 243 {_Defer, "defer", 0, 0}, 244 {_Else, "else", 0, 0}, 245 {_Fallthrough, "fallthrough", 0, 0}, 246 {_For, "for", 0, 0}, 247 {_Func, "func", 0, 0}, 248 {_Go, "go", 0, 0}, 249 {_Goto, "goto", 0, 0}, 250 {_If, "if", 0, 0}, 251 {_Import, "import", 0, 0}, 252 {_Interface, "interface", 0, 0}, 253 {_Map, "map", 0, 0}, 254 {_Package, "package", 0, 0}, 255 {_Range, "range", 0, 0}, 256 {_Return, "return", 0, 0}, 257 {_Select, "select", 0, 0}, 258 {_Struct, "struct", 0, 0}, 259 {_Switch, "switch", 0, 0}, 260 {_Type, "type", 0, 0}, 261 {_Var, "var", 0, 0}, 262 } 263 264 func TestScanErrors(t *testing.T) { 265 for _, test := range []struct { 266 src, msg string 267 line, col uint // 0-based 268 }{ 269 // Note: Positions for lexical errors are the earliest position 270 // where the error is apparent, not the beginning of the respective 271 // token. 272 273 // rune-level errors 274 {"fo\x00o", "invalid NUL character", 0, 2}, 275 {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0}, 276 {"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0}, 277 278 // token-level errors 279 {"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0}, 280 {"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */}, 281 {"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0}, 282 {"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */}, 283 284 {"x + ~y", "bitwise complement operator is ^", 0, 4}, 285 {"foo$bar = 0", "invalid character U+0024 '$'", 0, 3}, 286 {"const x = 0xyz", "malformed hex constant", 0, 12}, 287 {"0123456789", "malformed octal constant", 0, 10}, 288 {"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant 289 {"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant 290 {"var a, b = 08, 07\n", "malformed octal constant", 0, 13}, 291 {"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10}, 292 293 {`''`, "empty character literal or unescaped ' in character literal", 0, 1}, 294 {"'\n", "newline in character literal", 0, 1}, 295 {`'\`, "invalid character literal (missing closing ')", 0, 0}, 296 {`'\'`, "invalid character literal (missing closing ')", 0, 0}, 297 {`'\x`, "invalid character literal (missing closing ')", 0, 0}, 298 {`'\x'`, "non-hex character in escape sequence: '", 0, 3}, 299 {`'\y'`, "unknown escape sequence", 0, 2}, 300 {`'\x0'`, "non-hex character in escape sequence: '", 0, 4}, 301 {`'\00'`, "non-octal character in escape sequence: '", 0, 4}, 302 {`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape 303 {`'\378`, "non-octal character in escape sequence: 8", 0, 4}, 304 {`'\400'`, "octal escape value > 255: 256", 0, 5}, 305 {`'xx`, "invalid character literal (missing closing ')", 0, 0}, 306 {`'xx'`, "invalid character literal (more than one character)", 0, 0}, 307 308 {"\"\n", "newline in string", 0, 1}, 309 {`"`, "string not terminated", 0, 0}, 310 {`"foo`, "string not terminated", 0, 0}, 311 {"`", "string not terminated", 0, 0}, 312 {"`foo", "string not terminated", 0, 0}, 313 {"/*/", "comment not terminated", 0, 0}, 314 {"/*\n\nfoo", "comment not terminated", 0, 0}, 315 {"/*\n\nfoo", "comment not terminated", 0, 0}, 316 {`"\`, "string not terminated", 0, 0}, 317 {`"\"`, "string not terminated", 0, 0}, 318 {`"\x`, "string not terminated", 0, 0}, 319 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 320 {`"\y"`, "unknown escape sequence", 0, 2}, 321 {`"\x0"`, "non-hex character in escape sequence: \"", 0, 4}, 322 {`"\00"`, "non-octal character in escape sequence: \"", 0, 4}, 323 {`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape 324 {`"\378"`, "non-octal character in escape sequence: 8", 0, 4}, 325 {`"\400"`, "octal escape value > 255: 256", 0, 5}, 326 327 {`s := "foo\z"`, "unknown escape sequence", 0, 10}, 328 {`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10}, 329 {`"\x`, "string not terminated", 0, 0}, 330 {`"\x"`, "non-hex character in escape sequence: \"", 0, 3}, 331 {`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18}, 332 {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18}, 333 334 // former problem cases 335 {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0}, 336 } { 337 var s scanner 338 nerrors := 0 339 s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) { 340 nerrors++ 341 // only check the first error 342 if nerrors == 1 { 343 if msg != test.msg { 344 t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) 345 } 346 if line != test.line+linebase { 347 t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase) 348 } 349 if col != test.col+colbase { 350 t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase) 351 } 352 } else if nerrors > 1 { 353 // TODO(gri) make this use position info 354 t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) 355 } 356 }, nil) 357 358 for { 359 s.next() 360 if s.tok == _EOF { 361 break 362 } 363 } 364 365 if nerrors == 0 { 366 t.Errorf("%q: got no error; want %q", test.src, test.msg) 367 } 368 } 369 }