github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/parser/lexer_test.go (about)

     1  // Copyright 2022 zGraph Authors. All rights reserved.
     2  //
     3  // Copyright 2016 PingCAP, Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package parser
    17  
    18  import (
    19  	"fmt"
    20  	"testing"
    21  	"unicode"
    22  
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestTokenID(t *testing.T) {
    27  	for str, tok := range tokenMap {
    28  		l := NewLexer(str)
    29  		var v yySymType
    30  		tok1 := l.Lex(&v)
    31  		require.NotEqualf(t, invalid, tok1, "token '%s' should not be invalid", str)
    32  		require.Equalf(t, tok, tok1, "token '%s' not match", str)
    33  	}
    34  }
    35  
    36  func TestSingleChar(t *testing.T) {
    37  	table := []byte{'|', '&', '-', '+', '*', '/', '%', '^', '~', '(', ',', ')'}
    38  	for _, tok := range table {
    39  		l := NewLexer(string(tok))
    40  		var v yySymType
    41  		tok1 := l.Lex(&v)
    42  		require.Equal(t, tok1, int(tok))
    43  	}
    44  }
    45  
    46  type testCaseItem struct {
    47  	str string
    48  	tok int
    49  }
    50  
    51  type testLiteralValue struct {
    52  	str string
    53  	val interface{}
    54  }
    55  
    56  func TestSingleCharOther(t *testing.T) {
    57  	table := []testCaseItem{
    58  		{"AT", identifier},
    59  		{"?", paramMarker},
    60  		{"PLACEHOLDER", identifier},
    61  		{"=", eq},
    62  		{".", int('.')},
    63  	}
    64  	runTest(t, table)
    65  }
    66  
    67  func TestAtLeadingIdentifier(t *testing.T) {
    68  	table := []testCaseItem{
    69  		{"@", singleAtIdentifier},
    70  		{"@''", singleAtIdentifier},
    71  		{"@1", singleAtIdentifier},
    72  		{"@.1_", singleAtIdentifier},
    73  		{"@-1.", singleAtIdentifier},
    74  		{"@~", singleAtIdentifier},
    75  		{"@$", singleAtIdentifier},
    76  		{"@a_3cbbc", singleAtIdentifier},
    77  		{"@`a_3cbbc`", singleAtIdentifier},
    78  		{"@-3cbbc", singleAtIdentifier},
    79  		{"@!3cbbc", singleAtIdentifier},
    80  		{"@@global.test", doubleAtIdentifier},
    81  		{"@@session.test", doubleAtIdentifier},
    82  		{"@@local.test", doubleAtIdentifier},
    83  		{"@@test", doubleAtIdentifier},
    84  		{"@@global.`test`", doubleAtIdentifier},
    85  		{"@@session.`test`", doubleAtIdentifier},
    86  		{"@@local.`test`", doubleAtIdentifier},
    87  		{"@@`test`", doubleAtIdentifier},
    88  	}
    89  	runTest(t, table)
    90  }
    91  
    92  func TestLiteral(t *testing.T) {
    93  	table := []testCaseItem{
    94  		{`'''a'''`, stringLit},
    95  		{`''a''`, stringLit},
    96  		{`""a""`, stringLit},
    97  		{`\'a\'`, int('\\')},
    98  		{`\"a\"`, int('\\')},
    99  		{"0.2314", decLit},
   100  		{"1234567890123456789012345678901234567890", decLit},
   101  		{"132.313", decLit},
   102  		{"23416", intLit},
   103  		{"123test", identifier},
   104  		{"123" + string(unicode.ReplacementChar) + "xxx", identifier},
   105  		{"0", intLit},
   106  		{fmt.Sprintf("t1%c", 0), identifier},
   107  		{"\\N", null},
   108  		{".*", int('.')},     // `.`, `*`
   109  		{".1_t_1_x", decLit}, // `.1`, `_t_1_x`
   110  		{".1e", invalid},
   111  		{".123", decLit},     // `.123`
   112  		{".1*23", decLit},    // `.1`, `*`, `23`
   113  		{".1,23", decLit},    // `.1`, `,`, `23`
   114  		{".1 23", decLit},    // `.1`, `23`
   115  		{".1$23", decLit},    // `.1`, `$23`
   116  		{".1a23", decLit},    // `.1`, `a23`
   117  		{".1C23", decLit},    // `.1`, `C23`
   118  		{".1\u0081", decLit}, // `.1`, `\u0081`
   119  		{".1\uff34", decLit}, // `.1`, `\uff34`
   120  	}
   121  	runTest(t, table)
   122  }
   123  
   124  func TestLiteralValue(t *testing.T) {
   125  	t.Skip()
   126  	table := []testLiteralValue{
   127  		{`'''a'''`, `'a'`},
   128  		{`''a''`, ``},
   129  		{`""a""`, ``},
   130  		{`\'a\'`, `\`},
   131  		{`\"a\"`, `\`},
   132  		{"0.2314", "0.2314"},
   133  		{"1234567890123456789012345678901234567890", "1234567890123456789012345678901234567890"},
   134  		{"132.313", "132.313"},
   135  		{"132.3e231", 1.323e+233},
   136  		{"132.3e-231", 1.323e-229},
   137  		{"001e-12", 1e-12},
   138  		{"23416", int64(23416)},
   139  		{"123test", "123test"},
   140  		{"123" + string(unicode.ReplacementChar) + "xxx", "123" + string(unicode.ReplacementChar) + "xxx"},
   141  		{"0", int64(0)},
   142  		{"0x3c26", "[60 38]"},
   143  		{"x'13181C76734725455A'", "[19 24 28 118 115 71 37 69 90]"},
   144  		{"0b01", "[1]"},
   145  		{fmt.Sprintf("t1%c", 0), "t1"},
   146  		{"N'some text'", "utf8"},
   147  		{"n'some text'", "utf8"},
   148  		{"\\N", `\N`},
   149  		{".*", `.`},                   // `.`, `*`
   150  		{".1_t_1_x", "0.1"},           // `.1`, `_t_1_x`
   151  		{"9e9e", float64(9000000000)}, // 9e9e = 9e9 + e
   152  		{".1e", ""},
   153  		// Issue #3954
   154  		{".1e23", float64(10000000000000000000000)}, // `.1e23`
   155  		{".123", "0.123"}, // `.123`
   156  		{".1*23", "0.1"},  // `.1`, `*`, `23`
   157  		{".1,23", "0.1"},  // `.1`, `,`, `23`
   158  		{".1 23", "0.1"},  // `.1`, `23`
   159  		{".1$23", "0.1"},  // `.1`, `$23`
   160  		{".1a23", "0.1"},  // `.1`, `a23`
   161  		{".1e23$23", float64(10000000000000000000000)}, // `.1e23`, `$23`
   162  		{".1e23a23", float64(10000000000000000000000)}, // `.1e23`, `a23`
   163  		{".1C23", "0.1"},    // `.1`, `C23`
   164  		{".1\u0081", "0.1"}, // `.1`, `\u0081`
   165  		{".1\uff34", "0.1"}, // `.1`, `\uff34`
   166  		{`b''`, "[]"},
   167  		{`b'0101'`, "[5]"},
   168  		{`0b0101`, "[5]"},
   169  	}
   170  	runLiteralTest(t, table)
   171  }
   172  
   173  func runTest(t *testing.T, table []testCaseItem) {
   174  	var val yySymType
   175  	for _, v := range table {
   176  		l := NewLexer(v.str)
   177  		tok := l.Lex(&val)
   178  		require.Equal(t, v.tok, tok, v.str)
   179  	}
   180  }
   181  
   182  func runLiteralTest(t *testing.T, table []testLiteralValue) {
   183  	for _, v := range table {
   184  		l := NewLexer(v.str)
   185  		val := l.LexLiteral()
   186  		switch val.(type) {
   187  		case int64:
   188  			require.Equal(t, v.val, val, v.str)
   189  		case float64:
   190  			require.Equal(t, v.val, val, v.str)
   191  		case string:
   192  			require.Equal(t, v.val, val, v.str)
   193  		default:
   194  			require.Equal(t, v.val, fmt.Sprint(val), v.str)
   195  		}
   196  	}
   197  }
   198  
   199  func TestComment(t *testing.T) {
   200  	table := []testCaseItem{
   201  		//{"-- select --\n1", intLit},
   202  		{"/*!40101 SET character_set_client = utf8 */;", set},
   203  		{"/* SET character_set_client = utf8 */;", int(';')},
   204  		{"/* some comments */ SELECT ", selectKwd},
   205  		{`# comment continues to the end of line
   206  SELECT`, selectKwd},
   207  		{"#comment\n123", intLit},
   208  	}
   209  	runTest(t, table)
   210  }
   211  
   212  func TestScanQuotedIdent(t *testing.T) {
   213  	l := NewLexer("`fk`")
   214  	l.r.peek()
   215  	tok, pos, lit := scanQuotedIdent(l)
   216  	require.Zero(t, pos.Offset)
   217  	require.Equal(t, quotedIdentifier, tok)
   218  	require.Equal(t, "fk", lit)
   219  }
   220  
   221  func TestScanString(t *testing.T) {
   222  	table := []struct {
   223  		raw    string
   224  		expect string
   225  	}{
   226  		{`' \n\tTest String'`, " \n\tTest String"},
   227  		{`'\x\B'`, "xB"},
   228  		{`'\0\'\"\b\n\r\t\\'`, "\000'\"\b\n\r\t\\"},
   229  		{`'\Z'`, "\x1a"},
   230  		{`'\%\_'`, `\%\_`},
   231  		{`'hello'`, "hello"},
   232  		{`'"hello"'`, `"hello"`},
   233  		{`'""hello""'`, `""hello""`},
   234  		{`'hel''lo'`, "hel'lo"},
   235  		{`'\'hello'`, "'hello"},
   236  		{`"hello"`, "hello"},
   237  		{`"'hello'"`, "'hello'"},
   238  		{`"''hello''"`, "''hello''"},
   239  		{`"hel""lo"`, `hel"lo`},
   240  		{`"\"hello"`, `"hello`},
   241  		{`'disappearing\ backslash'`, "disappearing backslash"},
   242  		{"'한국의中文UTF8およびテキストトラック'", "한국의中文UTF8およびテキストトラック"},
   243  		{"'\\a\x90'", "a\x90"},
   244  		{"'\\a\x18èàø»\x05'", "a\x18èàø»\x05"},
   245  	}
   246  
   247  	for _, v := range table {
   248  		l := NewLexer(v.raw)
   249  		tok, pos, lit := l.scan()
   250  		require.Zero(t, pos.Offset)
   251  		require.Equal(t, stringLit, tok)
   252  		require.Equal(t, v.expect, lit)
   253  	}
   254  }
   255  
   256  func TestIdentifier(t *testing.T) {
   257  	table := [][2]string{
   258  		{`哈哈`, "哈哈"},
   259  		{"`numeric`", "numeric"},
   260  		{"\r\n \r \n \tthere\t \n", "there"},
   261  		{`5number`, `5number`},
   262  		{"1_x", "1_x"},
   263  		{"0_x", "0_x"},
   264  		{string(unicode.ReplacementChar) + "xxx", string(unicode.ReplacementChar) + "xxx"},
   265  		{"9e", "9e"},
   266  		{"0b", "0b"},
   267  		{"0b123", "0b123"},
   268  		{"0b1ab", "0b1ab"},
   269  		{"0B01", "0B01"},
   270  		{"0x", "0x"},
   271  		{"0x7fz3", "0x7fz3"},
   272  		{"023a4", "023a4"},
   273  		{"9eTSs", "9eTSs"},
   274  		{fmt.Sprintf("t1%cxxx", 0), "t1"},
   275  	}
   276  	l := &Lexer{}
   277  	for _, item := range table {
   278  		l.reset(item[0])
   279  		var v yySymType
   280  		tok := l.Lex(&v)
   281  		require.Equal(t, identifier, tok, item)
   282  		require.Equal(t, item[1], v.ident, item)
   283  	}
   284  }
   285  
   286  func TestInt(t *testing.T) {
   287  	tests := []struct {
   288  		input  string
   289  		expect uint64
   290  	}{
   291  		{"01000001783", 1000001783},
   292  		{"00001783", 1783},
   293  		{"0", 0},
   294  		{"0000", 0},
   295  		{"01", 1},
   296  		{"10", 10},
   297  	}
   298  	scanner := NewLexer("")
   299  	for _, test := range tests {
   300  		var v yySymType
   301  		scanner.reset(test.input)
   302  		tok := scanner.Lex(&v)
   303  		require.Equal(t, intLit, tok)
   304  		switch i := v.item.(type) {
   305  		case int64:
   306  			require.Equal(t, test.expect, uint64(i))
   307  		case uint64:
   308  			require.Equal(t, test.expect, i)
   309  		default:
   310  			t.Fail()
   311  		}
   312  	}
   313  }
   314  
   315  func TestIllegal(t *testing.T) {
   316  	table := []testCaseItem{
   317  		{"'", invalid},
   318  		{"'fu", invalid},
   319  		{"'\\n", invalid},
   320  		{"'\\", invalid},
   321  		{fmt.Sprintf("%c", 0), invalid},
   322  		{"`", invalid},
   323  		{`"`, invalid},
   324  		{"@`", invalid},
   325  		{"@'", invalid},
   326  		{`@"`, invalid},
   327  		{"@@`", invalid},
   328  		{"@@global.`", invalid},
   329  	}
   330  	runTest(t, table)
   331  }
   332  
   333  func TestVersionDigits(t *testing.T) {
   334  	tests := []struct {
   335  		input    string
   336  		min      int
   337  		max      int
   338  		nextChar byte
   339  	}{
   340  		{
   341  			input:    "12345",
   342  			min:      5,
   343  			max:      5,
   344  			nextChar: 0,
   345  		},
   346  		{
   347  			input:    "12345xyz",
   348  			min:      5,
   349  			max:      5,
   350  			nextChar: 'x',
   351  		},
   352  		{
   353  			input:    "1234xyz",
   354  			min:      5,
   355  			max:      5,
   356  			nextChar: '1',
   357  		},
   358  		{
   359  			input:    "123456",
   360  			min:      5,
   361  			max:      5,
   362  			nextChar: '6',
   363  		},
   364  		{
   365  			input:    "1234",
   366  			min:      5,
   367  			max:      5,
   368  			nextChar: '1',
   369  		},
   370  		{
   371  			input:    "",
   372  			min:      5,
   373  			max:      5,
   374  			nextChar: 0,
   375  		},
   376  		{
   377  			input:    "1234567xyz",
   378  			min:      5,
   379  			max:      6,
   380  			nextChar: '7',
   381  		},
   382  		{
   383  			input:    "12345xyz",
   384  			min:      5,
   385  			max:      6,
   386  			nextChar: 'x',
   387  		},
   388  		{
   389  			input:    "12345",
   390  			min:      5,
   391  			max:      6,
   392  			nextChar: 0,
   393  		},
   394  		{
   395  			input:    "1234xyz",
   396  			min:      5,
   397  			max:      6,
   398  			nextChar: '1',
   399  		},
   400  	}
   401  
   402  	scanner := NewLexer("")
   403  	for _, test := range tests {
   404  		scanner.reset(test.input)
   405  		scanner.scanVersionDigits(test.min, test.max)
   406  		nextChar := scanner.r.readByte()
   407  		require.Equalf(t, test.nextChar, nextChar, "input = %s", test.input)
   408  	}
   409  }