github.com/XiaoMi/Gaea@v1.2.5/parser/lexer_test.go (about)

     1  // Copyright 2016 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package parser
    15  
    16  import (
    17  	"fmt"
    18  	"unicode"
    19  
    20  	. "github.com/pingcap/check"
    21  
    22  	"github.com/XiaoMi/Gaea/mysql"
    23  )
    24  
    25  var _ = Suite(&testLexerSuite{})
    26  
    27  type testLexerSuite struct {
    28  }
    29  
    30  func (s *testLexerSuite) TestTokenID(c *C) {
    31  	for str, tok := range tokenMap {
    32  		l := NewScanner(str)
    33  		var v yySymType
    34  		tok1 := l.Lex(&v)
    35  		c.Check(tok, Equals, tok1)
    36  	}
    37  }
    38  
    39  func (s *testLexerSuite) TestSingleChar(c *C) {
    40  	table := []byte{'|', '&', '-', '+', '*', '/', '%', '^', '~', '(', ',', ')'}
    41  	for _, tok := range table {
    42  		l := NewScanner(string(tok))
    43  		var v yySymType
    44  		tok1 := l.Lex(&v)
    45  		c.Check(int(tok), Equals, tok1)
    46  	}
    47  }
    48  
    49  type testCaseItem struct {
    50  	str string
    51  	tok int
    52  }
    53  
    54  func (s *testLexerSuite) TestSingleCharOther(c *C) {
    55  	table := []testCaseItem{
    56  		{"AT", identifier},
    57  		{"?", paramMarker},
    58  		{"PLACEHOLDER", identifier},
    59  		{"=", eq},
    60  		{".", int('.')},
    61  	}
    62  	runTest(c, table)
    63  }
    64  
    65  func (s *testLexerSuite) TestAtLeadingIdentifier(c *C) {
    66  	table := []testCaseItem{
    67  		{"@", singleAtIdentifier},
    68  		{"@''", singleAtIdentifier},
    69  		{"@1", singleAtIdentifier},
    70  		{"@.1_", singleAtIdentifier},
    71  		{"@-1.", singleAtIdentifier},
    72  		{"@~", singleAtIdentifier},
    73  		{"@$", singleAtIdentifier},
    74  		{"@a_3cbbc", singleAtIdentifier},
    75  		{"@`a_3cbbc`", singleAtIdentifier},
    76  		{"@-3cbbc", singleAtIdentifier},
    77  		{"@!3cbbc", singleAtIdentifier},
    78  		{"@@global.test", doubleAtIdentifier},
    79  		{"@@session.test", doubleAtIdentifier},
    80  		{"@@local.test", doubleAtIdentifier},
    81  		{"@@test", doubleAtIdentifier},
    82  		{"@@global.`test`", doubleAtIdentifier},
    83  		{"@@session.`test`", doubleAtIdentifier},
    84  		{"@@local.`test`", doubleAtIdentifier},
    85  		{"@@`test`", doubleAtIdentifier},
    86  	}
    87  	runTest(c, table)
    88  }
    89  
    90  func (s *testLexerSuite) TestUnderscoreCS(c *C) {
    91  	var v yySymType
    92  	scanner := NewScanner(`_utf8"string"`)
    93  	tok := scanner.Lex(&v)
    94  	c.Check(tok, Equals, underscoreCS)
    95  	tok = scanner.Lex(&v)
    96  	c.Check(tok, Equals, stringLit)
    97  
    98  	scanner.reset("N'string'")
    99  	tok = scanner.Lex(&v)
   100  	c.Check(tok, Equals, underscoreCS)
   101  	tok = scanner.Lex(&v)
   102  	c.Check(tok, Equals, stringLit)
   103  }
   104  
   105  func (s *testLexerSuite) TestLiteral(c *C) {
   106  	table := []testCaseItem{
   107  		{`'''a'''`, stringLit},
   108  		{`''a''`, stringLit},
   109  		{`""a""`, stringLit},
   110  		{`\'a\'`, int('\\')},
   111  		{`\"a\"`, int('\\')},
   112  		{"0.2314", decLit},
   113  		{"1234567890123456789012345678901234567890", decLit},
   114  		{"132.313", decLit},
   115  		{"132.3e231", floatLit},
   116  		{"132.3e-231", floatLit},
   117  		{"001e-12", floatLit},
   118  		{"23416", intLit},
   119  		{"123test", identifier},
   120  		{"123" + string(unicode.ReplacementChar) + "xxx", identifier},
   121  		{"0", intLit},
   122  		{"0x3c26", hexLit},
   123  		{"x'13181C76734725455A'", hexLit},
   124  		{"0b01", bitLit},
   125  		{fmt.Sprintf("t1%c", 0), identifier},
   126  		{"N'some text'", underscoreCS},
   127  		{"n'some text'", underscoreCS},
   128  		{"\\N", null},
   129  		{".*", int('.')},       // `.`, `*`
   130  		{".1_t_1_x", int('.')}, // `.`, `1_t_1_x`
   131  		{"9e9e", floatLit},     // 9e9e = 9e9 + e
   132  		// Issue #3954
   133  		{".1e23", floatLit}, // `.1e23`
   134  		{".123", decLit},    // `.123`
   135  		{".1*23", decLit},   // `.1`, `*`, `23`
   136  		{".1,23", decLit},   // `.1`, `,`, `23`
   137  		{".1 23", decLit},   // `.1`, `23`
   138  		// TODO: See #3963. The following test cases do not test the ambiguity.
   139  		{".1$23", int('.')},    // `.`, `1$23`
   140  		{".1a23", int('.')},    // `.`, `1a23`
   141  		{".1e23$23", int('.')}, // `.`, `1e23$23`
   142  		{".1e23a23", int('.')}, // `.`, `1e23a23`
   143  		{".1C23", int('.')},    // `.`, `1C23`
   144  		{".1\u0081", int('.')}, // `.`, `1\u0081`
   145  		{".1\uff34", int('.')}, // `.`, `1\uff34`
   146  		{`b''`, bitLit},
   147  		{`b'0101'`, bitLit},
   148  		{`0b0101`, bitLit},
   149  	}
   150  	runTest(c, table)
   151  }
   152  
   153  func runTest(c *C, table []testCaseItem) {
   154  	var val yySymType
   155  	for _, v := range table {
   156  		l := NewScanner(v.str)
   157  		tok := l.Lex(&val)
   158  		c.Check(tok, Equals, v.tok, Commentf(v.str))
   159  	}
   160  }
   161  
   162  func (s *testLexerSuite) TestComment(c *C) {
   163  
   164  	table := []testCaseItem{
   165  		{"-- select --\n1", intLit},
   166  		{"/*!40101 SET character_set_client = utf8 */;", set},
   167  		{"/*+ BKA(t1) */", hintBegin},
   168  		{"/* SET character_set_client = utf8 */;", int(';')},
   169  		{"/* some comments */ SELECT ", selectKwd},
   170  		{`-- comment continues to the end of line
   171  SELECT`, selectKwd},
   172  		{`# comment continues to the end of line
   173  SELECT`, selectKwd},
   174  		{"#comment\n123", intLit},
   175  		{"--5", int('-')},
   176  		{"--\nSELECT", selectKwd},
   177  		{"--\tSELECT", 0},
   178  		{"--\r\nSELECT", selectKwd},
   179  		{"--", 0},
   180  	}
   181  	runTest(c, table)
   182  }
   183  
   184  func (s *testLexerSuite) TestscanQuotedIdent(c *C) {
   185  	l := NewScanner("`fk`")
   186  	l.r.peek()
   187  	tok, pos, lit := scanQuotedIdent(l)
   188  	c.Assert(pos.Offset, Equals, 0)
   189  	c.Assert(tok, Equals, quotedIdentifier)
   190  	c.Assert(lit, Equals, "fk")
   191  }
   192  
   193  func (s *testLexerSuite) TestscanString(c *C) {
   194  	table := []struct {
   195  		raw    string
   196  		expect string
   197  	}{
   198  		{`' \n\tTest String'`, " \n\tTest String"},
   199  		{`'\x\B'`, "xB"},
   200  		{`'\0\'\"\b\n\r\t\\'`, "\000'\"\b\n\r\t\\"},
   201  		{`'\Z'`, string(rune(26))},
   202  		{`'\%\_'`, `\%\_`},
   203  		{`'hello'`, "hello"},
   204  		{`'"hello"'`, `"hello"`},
   205  		{`'""hello""'`, `""hello""`},
   206  		{`'hel''lo'`, "hel'lo"},
   207  		{`'\'hello'`, "'hello"},
   208  		{`"hello"`, "hello"},
   209  		{`"'hello'"`, "'hello'"},
   210  		{`"''hello''"`, "''hello''"},
   211  		{`"hel""lo"`, `hel"lo`},
   212  		{`"\"hello"`, `"hello`},
   213  		{`'disappearing\ backslash'`, "disappearing backslash"},
   214  		{"'한국의中文UTF8およびテキストトラック'", "한국의中文UTF8およびテキストトラック"},
   215  		{"'\\a\x90'", "a\x90"},
   216  		{`"\aèàø»"`, `aèàø»`},
   217  	}
   218  
   219  	for _, v := range table {
   220  		l := NewScanner(v.raw)
   221  		tok, pos, lit := l.scan()
   222  		c.Assert(tok, Equals, stringLit)
   223  		c.Assert(pos.Offset, Equals, 0)
   224  		c.Assert(lit, Equals, v.expect)
   225  	}
   226  }
   227  
   228  func (s *testLexerSuite) TestIdentifier(c *C) {
   229  	replacementString := string(unicode.ReplacementChar) + "xxx"
   230  	table := [][2]string{
   231  		{`哈哈`, "哈哈"},
   232  		{"`numeric`", "numeric"},
   233  		{"\r\n \r \n \tthere\t \n", "there"},
   234  		{`5number`, `5number`},
   235  		{"1_x", "1_x"},
   236  		{"0_x", "0_x"},
   237  		{replacementString, replacementString},
   238  		{"9e", "9e"},
   239  		{"0b", "0b"},
   240  		{"0b123", "0b123"},
   241  		{"0b1ab", "0b1ab"},
   242  		{"0B01", "0B01"},
   243  		{"0x", "0x"},
   244  		{"0x7fz3", "0x7fz3"},
   245  		{"023a4", "023a4"},
   246  		{"9eTSs", "9eTSs"},
   247  		{fmt.Sprintf("t1%cxxx", 0), "t1"},
   248  	}
   249  	l := &Scanner{}
   250  	for _, item := range table {
   251  		l.reset(item[0])
   252  		var v yySymType
   253  		tok := l.Lex(&v)
   254  		c.Assert(tok, Equals, identifier)
   255  		c.Assert(v.ident, Equals, item[1])
   256  	}
   257  }
   258  
   259  func (s *testLexerSuite) TestSpecialComment(c *C) {
   260  	l := NewScanner("/*!40101 select\n5*/")
   261  	tok, pos, lit := l.scan()
   262  	c.Assert(tok, Equals, identifier)
   263  	c.Assert(lit, Equals, "select")
   264  	c.Assert(pos, Equals, Pos{0, 0, 9})
   265  
   266  	tok, pos, lit = l.scan()
   267  	c.Assert(tok, Equals, intLit)
   268  	c.Assert(lit, Equals, "5")
   269  	c.Assert(pos, Equals, Pos{1, 1, 16})
   270  }
   271  
   272  func (s *testLexerSuite) TestOptimizerHint(c *C) {
   273  	l := NewScanner("  /*+ BKA(t1) */")
   274  	tokens := []struct {
   275  		tok int
   276  		lit string
   277  		pos int
   278  	}{
   279  		{hintBegin, "", 2},
   280  		{identifier, "BKA", 6},
   281  		{int('('), "(", 9},
   282  		{identifier, "t1", 10},
   283  		{int(')'), ")", 12},
   284  		{hintEnd, "", 14},
   285  	}
   286  	for i := 0; ; i++ {
   287  		tok, pos, lit := l.scan()
   288  		if tok == 0 {
   289  			return
   290  		}
   291  		c.Assert(tok, Equals, tokens[i].tok, Commentf("%d", i))
   292  		c.Assert(lit, Equals, tokens[i].lit, Commentf("%d", i))
   293  		c.Assert(pos.Offset, Equals, tokens[i].pos, Commentf("%d", i))
   294  	}
   295  }
   296  
   297  func (s *testLexerSuite) TestInt(c *C) {
   298  	tests := []struct {
   299  		input  string
   300  		expect uint64
   301  	}{
   302  		{"01000001783", 1000001783},
   303  		{"00001783", 1783},
   304  		{"0", 0},
   305  		{"0000", 0},
   306  		{"01", 1},
   307  		{"10", 10},
   308  	}
   309  	scanner := NewScanner("")
   310  	for _, t := range tests {
   311  		var v yySymType
   312  		scanner.reset(t.input)
   313  		tok := scanner.Lex(&v)
   314  		c.Assert(tok, Equals, intLit)
   315  		switch i := v.item.(type) {
   316  		case int64:
   317  			c.Assert(uint64(i), Equals, t.expect)
   318  		case uint64:
   319  			c.Assert(i, Equals, t.expect)
   320  		default:
   321  			c.Fail()
   322  		}
   323  	}
   324  }
   325  
   326  func (s *testLexerSuite) TestSQLModeANSIQuotes(c *C) {
   327  	tests := []struct {
   328  		input string
   329  		tok   int
   330  		ident string
   331  	}{
   332  		{`"identifier"`, identifier, "identifier"},
   333  		{"`identifier`", identifier, "identifier"},
   334  		{`"identifier""and"`, identifier, `identifier"and`},
   335  		{`'string''string'`, stringLit, "string'string"},
   336  		{`"identifier"'and'`, identifier, "identifier"},
   337  		{`'string'"identifier"`, stringLit, "string"},
   338  	}
   339  	scanner := NewScanner("")
   340  	scanner.SetSQLMode(mysql.ModeANSIQuotes)
   341  	for _, t := range tests {
   342  		var v yySymType
   343  		scanner.reset(t.input)
   344  		tok := scanner.Lex(&v)
   345  		c.Assert(tok, Equals, t.tok)
   346  		c.Assert(v.ident, Equals, t.ident)
   347  	}
   348  	scanner.reset(`'string' 'string'`)
   349  	var v yySymType
   350  	tok := scanner.Lex(&v)
   351  	c.Assert(tok, Equals, stringLit)
   352  	c.Assert(v.ident, Equals, "string")
   353  	tok = scanner.Lex(&v)
   354  	c.Assert(tok, Equals, stringLit)
   355  	c.Assert(v.ident, Equals, "string")
   356  }
   357  
   358  func (s *testLexerSuite) TestIllegal(c *C) {
   359  	table := []testCaseItem{
   360  		{"'", 0},
   361  		{"'fu", 0},
   362  		{"'\\n", 0},
   363  		{"'\\", 0},
   364  		{fmt.Sprintf("%c", 0), invalid},
   365  		{"`", 0},
   366  		{`"`, 0},
   367  		{"@`", 0},
   368  		{"@'", 0},
   369  		{`@"`, 0},
   370  		{"@@`", 0},
   371  		{"@@global.`", 0},
   372  	}
   373  	runTest(c, table)
   374  }