github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/cmd/compile/internal/syntax/scanner_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import ( 8 "fmt" 9 "os" 10 "testing" 11 ) 12 13 func TestScanner(t *testing.T) { 14 if testing.Short() { 15 t.Skip("skipping test in short mode") 16 } 17 18 src, err := os.Open("parser.go") 19 if err != nil { 20 t.Fatal(err) 21 } 22 defer src.Close() 23 24 var s scanner 25 s.init(src, nil, nil) 26 for { 27 s.next() 28 if s.tok == _EOF { 29 break 30 } 31 switch s.tok { 32 case _Name: 33 fmt.Println(s.line, s.tok, "=>", s.lit) 34 case _Operator: 35 fmt.Println(s.line, s.tok, "=>", s.op, s.prec) 36 default: 37 fmt.Println(s.line, s.tok) 38 } 39 } 40 } 41 42 func TestTokens(t *testing.T) { 43 // make source 44 var buf []byte 45 for i, s := range sampleTokens { 46 buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation 47 buf = append(buf, s.src...) // token 48 buf = append(buf, " "[:i&7]...) // trailing spaces 49 buf = append(buf, "/* foo */ // bar\n"...) // comments 50 } 51 52 // scan source 53 var got scanner 54 got.init(&bytesReader{buf}, nil, nil) 55 got.next() 56 for i, want := range sampleTokens { 57 nlsemi := false 58 59 if got.line != uint(i+1) { 60 t.Errorf("got line %d; want %d", got.line, i+1) 61 } 62 63 if got.tok != want.tok { 64 t.Errorf("got tok = %s; want %s", got.tok, want.tok) 65 continue 66 } 67 68 switch want.tok { 69 case _Name, _Literal: 70 if got.lit != want.src { 71 t.Errorf("got lit = %q; want %q", got.lit, want.src) 72 continue 73 } 74 nlsemi = true 75 76 case _Operator, _AssignOp, _IncOp: 77 if got.op != want.op { 78 t.Errorf("got op = %s; want %s", got.op, want.op) 79 continue 80 } 81 if got.prec != want.prec { 82 t.Errorf("got prec = %d; want %d", got.prec, want.prec) 83 continue 84 } 85 nlsemi = want.tok == _IncOp 86 87 case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return: 88 nlsemi = true 89 } 90 91 if nlsemi { 92 got.next() 93 if got.tok != _Semi { 94 t.Errorf("got tok = %s; want ;", got.tok) 95 continue 96 } 97 } 98 99 got.next() 100 } 101 102 if got.tok != _EOF { 103 t.Errorf("got %q; want _EOF", got.tok) 104 } 105 } 106 107 var sampleTokens = [...]struct { 108 tok token 109 src string 110 op Operator 111 prec int 112 }{ 113 // name samples 114 {_Name, "x", 0, 0}, 115 {_Name, "X123", 0, 0}, 116 {_Name, "foo", 0, 0}, 117 {_Name, "Foo123", 0, 0}, 118 {_Name, "foo_bar", 0, 0}, 119 {_Name, "_", 0, 0}, 120 {_Name, "_foobar", 0, 0}, 121 {_Name, "a۰۱۸", 0, 0}, 122 {_Name, "foo६४", 0, 0}, 123 {_Name, "bar9876", 0, 0}, 124 {_Name, "ŝ", 0, 0}, 125 {_Name, "ŝfoo", 0, 0}, 126 127 // literal samples 128 {_Literal, "0", 0, 0}, 129 {_Literal, "1", 0, 0}, 130 {_Literal, "12345", 0, 0}, 131 {_Literal, "123456789012345678890123456789012345678890", 0, 0}, 132 {_Literal, "01234567", 0, 0}, 133 {_Literal, "0x0", 0, 0}, 134 {_Literal, "0xcafebabe", 0, 0}, 135 {_Literal, "0.", 0, 0}, 136 {_Literal, "0.e0", 0, 0}, 137 {_Literal, "0.e-1", 0, 0}, 138 {_Literal, "0.e+123", 0, 0}, 139 {_Literal, ".0", 0, 0}, 140 {_Literal, ".0E00", 0, 0}, 141 {_Literal, ".0E-0123", 0, 0}, 142 {_Literal, ".0E+12345678901234567890", 0, 0}, 143 {_Literal, ".45e1", 0, 0}, 144 {_Literal, "3.14159265", 0, 0}, 145 {_Literal, "1e0", 0, 0}, 146 {_Literal, "1e+100", 0, 0}, 147 {_Literal, "1e-100", 0, 0}, 148 {_Literal, "2.71828e-1000", 0, 0}, 149 {_Literal, "0i", 0, 0}, 150 {_Literal, "1i", 0, 0}, 151 {_Literal, "012345678901234567889i", 0, 0}, 152 {_Literal, "123456789012345678890i", 0, 0}, 153 {_Literal, "0.i", 0, 0}, 154 {_Literal, ".0i", 0, 0}, 155 {_Literal, "3.14159265i", 0, 0}, 156 {_Literal, "1e0i", 0, 0}, 157 {_Literal, "1e+100i", 0, 0}, 158 {_Literal, "1e-100i", 0, 0}, 159 {_Literal, "2.71828e-1000i", 0, 0}, 160 {_Literal, "'a'", 0, 0}, 161 {_Literal, "'\\000'", 0, 0}, 162 {_Literal, "'\\xFF'", 0, 0}, 163 {_Literal, "'\\uff16'", 0, 0}, 164 {_Literal, "'\\U0000ff16'", 0, 0}, 165 {_Literal, "`foobar`", 0, 0}, 166 {_Literal, "`foo\tbar`", 0, 0}, 167 {_Literal, "`\r`", 0, 0}, 168 169 // operators 170 {_Operator, "||", OrOr, precOrOr}, 171 172 {_Operator, "&&", AndAnd, precAndAnd}, 173 174 {_Operator, "==", Eql, precCmp}, 175 {_Operator, "!=", Neq, precCmp}, 176 {_Operator, "<", Lss, precCmp}, 177 {_Operator, "<=", Leq, precCmp}, 178 {_Operator, ">", Gtr, precCmp}, 179 {_Operator, ">=", Geq, precCmp}, 180 181 {_Operator, "+", Add, precAdd}, 182 {_Operator, "-", Sub, precAdd}, 183 {_Operator, "|", Or, precAdd}, 184 {_Operator, "^", Xor, precAdd}, 185 186 {_Star, "*", Mul, precMul}, 187 {_Operator, "/", Div, precMul}, 188 {_Operator, "%", Rem, precMul}, 189 {_Operator, "&", And, precMul}, 190 {_Operator, "&^", AndNot, precMul}, 191 {_Operator, "<<", Shl, precMul}, 192 {_Operator, ">>", Shr, precMul}, 193 194 // assignment operations 195 {_AssignOp, "+=", Add, precAdd}, 196 {_AssignOp, "-=", Sub, precAdd}, 197 {_AssignOp, "|=", Or, precAdd}, 198 {_AssignOp, "^=", Xor, precAdd}, 199 200 {_AssignOp, "*=", Mul, precMul}, 201 {_AssignOp, "/=", Div, precMul}, 202 {_AssignOp, "%=", Rem, precMul}, 203 {_AssignOp, "&=", And, precMul}, 204 {_AssignOp, "&^=", AndNot, precMul}, 205 {_AssignOp, "<<=", Shl, precMul}, 206 {_AssignOp, ">>=", Shr, precMul}, 207 208 // other operations 209 {_IncOp, "++", Add, precAdd}, 210 {_IncOp, "--", Sub, precAdd}, 211 {_Assign, "=", 0, 0}, 212 {_Define, ":=", 0, 0}, 213 {_Arrow, "<-", 0, 0}, 214 215 // delimiters 216 {_Lparen, "(", 0, 0}, 217 {_Lbrack, "[", 0, 0}, 218 {_Lbrace, "{", 0, 0}, 219 {_Rparen, ")", 0, 0}, 220 {_Rbrack, "]", 0, 0}, 221 {_Rbrace, "}", 0, 0}, 222 {_Comma, ",", 0, 0}, 223 {_Semi, ";", 0, 0}, 224 {_Colon, ":", 0, 0}, 225 {_Dot, ".", 0, 0}, 226 {_DotDotDot, "...", 0, 0}, 227 228 // keywords 229 {_Break, "break", 0, 0}, 230 {_Case, "case", 0, 0}, 231 {_Chan, "chan", 0, 0}, 232 {_Const, "const", 0, 0}, 233 {_Continue, "continue", 0, 0}, 234 {_Default, "default", 0, 0}, 235 {_Defer, "defer", 0, 0}, 236 {_Else, "else", 0, 0}, 237 {_Fallthrough, "fallthrough", 0, 0}, 238 {_For, "for", 0, 0}, 239 {_Func, "func", 0, 0}, 240 {_Go, "go", 0, 0}, 241 {_Goto, "goto", 0, 0}, 242 {_If, "if", 0, 0}, 243 {_Import, "import", 0, 0}, 244 {_Interface, "interface", 0, 0}, 245 {_Map, "map", 0, 0}, 246 {_Package, "package", 0, 0}, 247 {_Range, "range", 0, 0}, 248 {_Return, "return", 0, 0}, 249 {_Select, "select", 0, 0}, 250 {_Struct, "struct", 0, 0}, 251 {_Switch, "switch", 0, 0}, 252 {_Type, "type", 0, 0}, 253 {_Var, "var", 0, 0}, 254 } 255 256 func TestScanErrors(t *testing.T) { 257 for _, test := range []struct { 258 src, msg string 259 line, col uint 260 }{ 261 // Note: Positions for lexical errors are the earliest position 262 // where the error is apparent, not the beginning of the respective 263 // token. 264 265 // rune-level errors 266 {"fo\x00o", "invalid NUL character", 1, 2}, 267 {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 2, 0}, 268 {"foo\n\n\xff ", "invalid UTF-8 encoding", 3, 0}, 269 270 // token-level errors 271 {"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 1, 0}, 272 {"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 1, 13 /* byte offset */}, 273 {"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 1, 0}, 274 {"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 1, 8 /* byte offset */}, 275 276 {"x + ~y", "bitwise complement operator is ^", 1, 4}, 277 {"foo$bar = 0", "invalid character U+0024 '$'", 1, 3}, 278 {"const x = 0xyz", "malformed hex constant", 1, 12}, 279 {"0123456789", "malformed octal constant", 1, 10}, 280 {"0123456789. /* foobar", "comment not terminated", 1, 12}, // valid float constant 281 {"0123456789e0 /*\nfoobar", "comment not terminated", 1, 13}, // valid float constant 282 {"var a, b = 08, 07\n", "malformed octal constant", 1, 13}, 283 {"(x + 1.0e+x)", "malformed floating-point constant exponent", 1, 10}, 284 285 {`''`, "empty character literal or unescaped ' in character literal", 1, 1}, 286 {"'\n", "newline in character literal", 1, 1}, 287 {`'\`, "missing '", 1, 2}, 288 {`'\'`, "missing '", 1, 3}, 289 {`'\x`, "missing '", 1, 3}, 290 {`'\x'`, "non-hex character in escape sequence: '", 1, 3}, 291 {`'\y'`, "unknown escape sequence", 1, 2}, 292 {`'\x0'`, "non-hex character in escape sequence: '", 1, 4}, 293 {`'\00'`, "non-octal character in escape sequence: '", 1, 4}, 294 {`'\377' /*`, "comment not terminated", 1, 7}, // valid octal escape 295 {`'\378`, "non-octal character in escape sequence: 8", 1, 4}, 296 {`'\400'`, "octal escape value > 255: 256", 1, 5}, 297 {`'xx`, "missing '", 1, 2}, 298 299 {"\"\n", "newline in string", 1, 1}, 300 {`"`, "string not terminated", 1, 0}, 301 {`"foo`, "string not terminated", 1, 0}, 302 {"`", "string not terminated", 1, 0}, 303 {"`foo", "string not terminated", 1, 0}, 304 {"/*/", "comment not terminated", 1, 0}, 305 {"/*\n\nfoo", "comment not terminated", 1, 0}, 306 {"/*\n\nfoo", "comment not terminated", 1, 0}, 307 {`"\`, "string not terminated", 1, 0}, 308 {`"\"`, "string not terminated", 1, 0}, 309 {`"\x`, "string not terminated", 1, 0}, 310 {`"\x"`, "non-hex character in escape sequence: \"", 1, 3}, 311 {`"\y"`, "unknown escape sequence", 1, 2}, 312 {`"\x0"`, "non-hex character in escape sequence: \"", 1, 4}, 313 {`"\00"`, "non-octal character in escape sequence: \"", 1, 4}, 314 {`"\377" /*`, "comment not terminated", 1, 7}, // valid octal escape 315 {`"\378"`, "non-octal character in escape sequence: 8", 1, 4}, 316 {`"\400"`, "octal escape value > 255: 256", 1, 5}, 317 318 {`s := "foo\z"`, "unknown escape sequence", 1, 10}, 319 {`s := "foo\z00\nbar"`, "unknown escape sequence", 1, 10}, 320 {`"\x`, "string not terminated", 1, 0}, 321 {`"\x"`, "non-hex character in escape sequence: \"", 1, 3}, 322 {`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 18}, 323 {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 18}, 324 325 // former problem cases 326 {"package p\n\n\xef", "invalid UTF-8 encoding", 3, 0}, 327 } { 328 var s scanner 329 nerrors := 0 330 s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) { 331 nerrors++ 332 // only check the first error 333 if nerrors == 1 { 334 if msg != test.msg { 335 t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) 336 } 337 if line != test.line { 338 t.Errorf("%q: got line = %d; want %d", test.src, line, test.line) 339 } 340 if col != test.col { 341 t.Errorf("%q: got col = %d; want %d", test.src, col, test.col) 342 } 343 } else if nerrors > 1 { 344 // TODO(gri) make this use position info 345 t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) 346 } 347 }, nil) 348 349 for { 350 s.next() 351 if s.tok == _EOF { 352 break 353 } 354 } 355 356 if nerrors == 0 { 357 t.Errorf("%q: got no error; want %q", test.src, test.msg) 358 } 359 } 360 }