github.com/XiaoMi/Gaea@v1.2.5/parser/lexer_test.go (about) 1 // Copyright 2016 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package parser 15 16 import ( 17 "fmt" 18 "unicode" 19 20 . "github.com/pingcap/check" 21 22 "github.com/XiaoMi/Gaea/mysql" 23 ) 24 25 var _ = Suite(&testLexerSuite{}) 26 27 type testLexerSuite struct { 28 } 29 30 func (s *testLexerSuite) TestTokenID(c *C) { 31 for str, tok := range tokenMap { 32 l := NewScanner(str) 33 var v yySymType 34 tok1 := l.Lex(&v) 35 c.Check(tok, Equals, tok1) 36 } 37 } 38 39 func (s *testLexerSuite) TestSingleChar(c *C) { 40 table := []byte{'|', '&', '-', '+', '*', '/', '%', '^', '~', '(', ',', ')'} 41 for _, tok := range table { 42 l := NewScanner(string(tok)) 43 var v yySymType 44 tok1 := l.Lex(&v) 45 c.Check(int(tok), Equals, tok1) 46 } 47 } 48 49 type testCaseItem struct { 50 str string 51 tok int 52 } 53 54 func (s *testLexerSuite) TestSingleCharOther(c *C) { 55 table := []testCaseItem{ 56 {"AT", identifier}, 57 {"?", paramMarker}, 58 {"PLACEHOLDER", identifier}, 59 {"=", eq}, 60 {".", int('.')}, 61 } 62 runTest(c, table) 63 } 64 65 func (s *testLexerSuite) TestAtLeadingIdentifier(c *C) { 66 table := []testCaseItem{ 67 {"@", singleAtIdentifier}, 68 {"@''", singleAtIdentifier}, 69 {"@1", singleAtIdentifier}, 70 {"@.1_", singleAtIdentifier}, 71 {"@-1.", singleAtIdentifier}, 72 {"@~", singleAtIdentifier}, 73 {"@$", singleAtIdentifier}, 74 {"@a_3cbbc", singleAtIdentifier}, 75 {"@`a_3cbbc`", singleAtIdentifier}, 76 {"@-3cbbc", singleAtIdentifier}, 77 {"@!3cbbc", singleAtIdentifier}, 78 {"@@global.test", doubleAtIdentifier}, 79 {"@@session.test", doubleAtIdentifier}, 80 {"@@local.test", doubleAtIdentifier}, 81 {"@@test", doubleAtIdentifier}, 82 {"@@global.`test`", doubleAtIdentifier}, 83 {"@@session.`test`", doubleAtIdentifier}, 84 {"@@local.`test`", doubleAtIdentifier}, 85 {"@@`test`", doubleAtIdentifier}, 86 } 87 runTest(c, table) 88 } 89 90 func (s *testLexerSuite) TestUnderscoreCS(c *C) { 91 var v yySymType 92 scanner := NewScanner(`_utf8"string"`) 93 tok := scanner.Lex(&v) 94 c.Check(tok, Equals, underscoreCS) 95 tok = scanner.Lex(&v) 96 c.Check(tok, Equals, stringLit) 97 98 scanner.reset("N'string'") 99 tok = scanner.Lex(&v) 100 c.Check(tok, Equals, underscoreCS) 101 tok = scanner.Lex(&v) 102 c.Check(tok, Equals, stringLit) 103 } 104 105 func (s *testLexerSuite) TestLiteral(c *C) { 106 table := []testCaseItem{ 107 {`'''a'''`, stringLit}, 108 {`''a''`, stringLit}, 109 {`""a""`, stringLit}, 110 {`\'a\'`, int('\\')}, 111 {`\"a\"`, int('\\')}, 112 {"0.2314", decLit}, 113 {"1234567890123456789012345678901234567890", decLit}, 114 {"132.313", decLit}, 115 {"132.3e231", floatLit}, 116 {"132.3e-231", floatLit}, 117 {"001e-12", floatLit}, 118 {"23416", intLit}, 119 {"123test", identifier}, 120 {"123" + string(unicode.ReplacementChar) + "xxx", identifier}, 121 {"0", intLit}, 122 {"0x3c26", hexLit}, 123 {"x'13181C76734725455A'", hexLit}, 124 {"0b01", bitLit}, 125 {fmt.Sprintf("t1%c", 0), identifier}, 126 {"N'some text'", underscoreCS}, 127 {"n'some text'", underscoreCS}, 128 {"\\N", null}, 129 {".*", int('.')}, // `.`, `*` 130 {".1_t_1_x", int('.')}, // `.`, `1_t_1_x` 131 {"9e9e", floatLit}, // 9e9e = 9e9 + e 132 // Issue #3954 133 {".1e23", floatLit}, // `.1e23` 134 {".123", decLit}, // `.123` 135 {".1*23", decLit}, // `.1`, `*`, `23` 136 {".1,23", decLit}, // `.1`, `,`, `23` 137 {".1 23", decLit}, // `.1`, `23` 138 // TODO: See #3963. The following test cases do not test the ambiguity. 139 {".1$23", int('.')}, // `.`, `1$23` 140 {".1a23", int('.')}, // `.`, `1a23` 141 {".1e23$23", int('.')}, // `.`, `1e23$23` 142 {".1e23a23", int('.')}, // `.`, `1e23a23` 143 {".1C23", int('.')}, // `.`, `1C23` 144 {".1\u0081", int('.')}, // `.`, `1\u0081` 145 {".1\uff34", int('.')}, // `.`, `1\uff34` 146 {`b''`, bitLit}, 147 {`b'0101'`, bitLit}, 148 {`0b0101`, bitLit}, 149 } 150 runTest(c, table) 151 } 152 153 func runTest(c *C, table []testCaseItem) { 154 var val yySymType 155 for _, v := range table { 156 l := NewScanner(v.str) 157 tok := l.Lex(&val) 158 c.Check(tok, Equals, v.tok, Commentf(v.str)) 159 } 160 } 161 162 func (s *testLexerSuite) TestComment(c *C) { 163 164 table := []testCaseItem{ 165 {"-- select --\n1", intLit}, 166 {"/*!40101 SET character_set_client = utf8 */;", set}, 167 {"/*+ BKA(t1) */", hintBegin}, 168 {"/* SET character_set_client = utf8 */;", int(';')}, 169 {"/* some comments */ SELECT ", selectKwd}, 170 {`-- comment continues to the end of line 171 SELECT`, selectKwd}, 172 {`# comment continues to the end of line 173 SELECT`, selectKwd}, 174 {"#comment\n123", intLit}, 175 {"--5", int('-')}, 176 {"--\nSELECT", selectKwd}, 177 {"--\tSELECT", 0}, 178 {"--\r\nSELECT", selectKwd}, 179 {"--", 0}, 180 } 181 runTest(c, table) 182 } 183 184 func (s *testLexerSuite) TestscanQuotedIdent(c *C) { 185 l := NewScanner("`fk`") 186 l.r.peek() 187 tok, pos, lit := scanQuotedIdent(l) 188 c.Assert(pos.Offset, Equals, 0) 189 c.Assert(tok, Equals, quotedIdentifier) 190 c.Assert(lit, Equals, "fk") 191 } 192 193 func (s *testLexerSuite) TestscanString(c *C) { 194 table := []struct { 195 raw string 196 expect string 197 }{ 198 {`' \n\tTest String'`, " \n\tTest String"}, 199 {`'\x\B'`, "xB"}, 200 {`'\0\'\"\b\n\r\t\\'`, "\000'\"\b\n\r\t\\"}, 201 {`'\Z'`, string(rune(26))}, 202 {`'\%\_'`, `\%\_`}, 203 {`'hello'`, "hello"}, 204 {`'"hello"'`, `"hello"`}, 205 {`'""hello""'`, `""hello""`}, 206 {`'hel''lo'`, "hel'lo"}, 207 {`'\'hello'`, "'hello"}, 208 {`"hello"`, "hello"}, 209 {`"'hello'"`, "'hello'"}, 210 {`"''hello''"`, "''hello''"}, 211 {`"hel""lo"`, `hel"lo`}, 212 {`"\"hello"`, `"hello`}, 213 {`'disappearing\ backslash'`, "disappearing backslash"}, 214 {"'한국의中文UTF8およびテキストトラック'", "한국의中文UTF8およびテキストトラック"}, 215 {"'\\a\x90'", "a\x90"}, 216 {`"\aèàø»"`, `aèàø»`}, 217 } 218 219 for _, v := range table { 220 l := NewScanner(v.raw) 221 tok, pos, lit := l.scan() 222 c.Assert(tok, Equals, stringLit) 223 c.Assert(pos.Offset, Equals, 0) 224 c.Assert(lit, Equals, v.expect) 225 } 226 } 227 228 func (s *testLexerSuite) TestIdentifier(c *C) { 229 replacementString := string(unicode.ReplacementChar) + "xxx" 230 table := [][2]string{ 231 {`哈哈`, "哈哈"}, 232 {"`numeric`", "numeric"}, 233 {"\r\n \r \n \tthere\t \n", "there"}, 234 {`5number`, `5number`}, 235 {"1_x", "1_x"}, 236 {"0_x", "0_x"}, 237 {replacementString, replacementString}, 238 {"9e", "9e"}, 239 {"0b", "0b"}, 240 {"0b123", "0b123"}, 241 {"0b1ab", "0b1ab"}, 242 {"0B01", "0B01"}, 243 {"0x", "0x"}, 244 {"0x7fz3", "0x7fz3"}, 245 {"023a4", "023a4"}, 246 {"9eTSs", "9eTSs"}, 247 {fmt.Sprintf("t1%cxxx", 0), "t1"}, 248 } 249 l := &Scanner{} 250 for _, item := range table { 251 l.reset(item[0]) 252 var v yySymType 253 tok := l.Lex(&v) 254 c.Assert(tok, Equals, identifier) 255 c.Assert(v.ident, Equals, item[1]) 256 } 257 } 258 259 func (s *testLexerSuite) TestSpecialComment(c *C) { 260 l := NewScanner("/*!40101 select\n5*/") 261 tok, pos, lit := l.scan() 262 c.Assert(tok, Equals, identifier) 263 c.Assert(lit, Equals, "select") 264 c.Assert(pos, Equals, Pos{0, 0, 9}) 265 266 tok, pos, lit = l.scan() 267 c.Assert(tok, Equals, intLit) 268 c.Assert(lit, Equals, "5") 269 c.Assert(pos, Equals, Pos{1, 1, 16}) 270 } 271 272 func (s *testLexerSuite) TestOptimizerHint(c *C) { 273 l := NewScanner(" /*+ BKA(t1) */") 274 tokens := []struct { 275 tok int 276 lit string 277 pos int 278 }{ 279 {hintBegin, "", 2}, 280 {identifier, "BKA", 6}, 281 {int('('), "(", 9}, 282 {identifier, "t1", 10}, 283 {int(')'), ")", 12}, 284 {hintEnd, "", 14}, 285 } 286 for i := 0; ; i++ { 287 tok, pos, lit := l.scan() 288 if tok == 0 { 289 return 290 } 291 c.Assert(tok, Equals, tokens[i].tok, Commentf("%d", i)) 292 c.Assert(lit, Equals, tokens[i].lit, Commentf("%d", i)) 293 c.Assert(pos.Offset, Equals, tokens[i].pos, Commentf("%d", i)) 294 } 295 } 296 297 func (s *testLexerSuite) TestInt(c *C) { 298 tests := []struct { 299 input string 300 expect uint64 301 }{ 302 {"01000001783", 1000001783}, 303 {"00001783", 1783}, 304 {"0", 0}, 305 {"0000", 0}, 306 {"01", 1}, 307 {"10", 10}, 308 } 309 scanner := NewScanner("") 310 for _, t := range tests { 311 var v yySymType 312 scanner.reset(t.input) 313 tok := scanner.Lex(&v) 314 c.Assert(tok, Equals, intLit) 315 switch i := v.item.(type) { 316 case int64: 317 c.Assert(uint64(i), Equals, t.expect) 318 case uint64: 319 c.Assert(i, Equals, t.expect) 320 default: 321 c.Fail() 322 } 323 } 324 } 325 326 func (s *testLexerSuite) TestSQLModeANSIQuotes(c *C) { 327 tests := []struct { 328 input string 329 tok int 330 ident string 331 }{ 332 {`"identifier"`, identifier, "identifier"}, 333 {"`identifier`", identifier, "identifier"}, 334 {`"identifier""and"`, identifier, `identifier"and`}, 335 {`'string''string'`, stringLit, "string'string"}, 336 {`"identifier"'and'`, identifier, "identifier"}, 337 {`'string'"identifier"`, stringLit, "string"}, 338 } 339 scanner := NewScanner("") 340 scanner.SetSQLMode(mysql.ModeANSIQuotes) 341 for _, t := range tests { 342 var v yySymType 343 scanner.reset(t.input) 344 tok := scanner.Lex(&v) 345 c.Assert(tok, Equals, t.tok) 346 c.Assert(v.ident, Equals, t.ident) 347 } 348 scanner.reset(`'string' 'string'`) 349 var v yySymType 350 tok := scanner.Lex(&v) 351 c.Assert(tok, Equals, stringLit) 352 c.Assert(v.ident, Equals, "string") 353 tok = scanner.Lex(&v) 354 c.Assert(tok, Equals, stringLit) 355 c.Assert(v.ident, Equals, "string") 356 } 357 358 func (s *testLexerSuite) TestIllegal(c *C) { 359 table := []testCaseItem{ 360 {"'", 0}, 361 {"'fu", 0}, 362 {"'\\n", 0}, 363 {"'\\", 0}, 364 {fmt.Sprintf("%c", 0), invalid}, 365 {"`", 0}, 366 {`"`, 0}, 367 {"@`", 0}, 368 {"@'", 0}, 369 {`@"`, 0}, 370 {"@@`", 0}, 371 {"@@global.`", 0}, 372 } 373 runTest(c, table) 374 }