github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/parser/scan_test.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package parser
    12  
    13  import (
    14  	"fmt"
    15  	"reflect"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/sql/lex"
    20  	"github.com/cockroachdb/cockroach/pkg/testutils"
    21  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    22  	"github.com/cockroachdb/errors"
    23  )
    24  
    25  func TestScanner(t *testing.T) {
    26  	testData := []struct {
    27  		sql      string
    28  		expected []int
    29  	}{
    30  		{``, nil},
    31  		{` `, nil},
    32  		{` /* hello */`, nil},
    33  		{`.`, []int{'.'}},
    34  		{`..`, []int{DOT_DOT}},
    35  		{`!`, []int{'!'}},
    36  		{`!=`, []int{NOT_EQUALS}},
    37  		{`<`, []int{'<'}},
    38  		{`<>`, []int{NOT_EQUALS}},
    39  		{`<=`, []int{LESS_EQUALS}},
    40  		{`<<`, []int{LSHIFT}},
    41  		{`<<=`, []int{INET_CONTAINED_BY_OR_EQUALS}},
    42  		{`>`, []int{'>'}},
    43  		{`>=`, []int{GREATER_EQUALS}},
    44  		{`>>`, []int{RSHIFT}},
    45  		{`>>=`, []int{INET_CONTAINS_OR_EQUALS}},
    46  		{`=`, []int{'='}},
    47  		{`:`, []int{':'}},
    48  		{`::`, []int{TYPECAST}},
    49  		{`:: :`, []int{TYPECAST, ':'}},
    50  		{`(`, []int{'('}},
    51  		{`)`, []int{')'}},
    52  		{`[`, []int{'['}},
    53  		{`]`, []int{']'}},
    54  		{`,`, []int{','}},
    55  		{`;`, []int{';'}},
    56  		{`+`, []int{'+'}},
    57  		{`-`, []int{'-'}},
    58  		{`*`, []int{'*'}},
    59  		{`/`, []int{'/'}},
    60  		{`//`, []int{FLOORDIV}},
    61  		{`%`, []int{'%'}},
    62  		{`^`, []int{'^'}},
    63  		{`$`, []int{'$'}},
    64  		{`&`, []int{'&'}},
    65  		{`&&`, []int{AND_AND}},
    66  		{`|`, []int{'|'}},
    67  		{`||`, []int{CONCAT}},
    68  		{`|/`, []int{SQRT}},
    69  		{`||/`, []int{CBRT}},
    70  		{`#`, []int{'#'}},
    71  		{`~`, []int{'~'}},
    72  		{`!~`, []int{NOT_REGMATCH}},
    73  		{`~*`, []int{REGIMATCH}},
    74  		{`!~*`, []int{NOT_REGIMATCH}},
    75  		{`$1`, []int{PLACEHOLDER}},
    76  		{`$a`, []int{'$', IDENT}},
    77  		{`a`, []int{IDENT}},
    78  		{`foo + bar`, []int{IDENT, '+', IDENT}},
    79  		{`select a from b`, []int{SELECT, IDENT, FROM, IDENT}},
    80  		{`"a" "b"`, []int{IDENT, IDENT}},
    81  		{`'a'`, []int{SCONST}},
    82  		{`$$a$$`, []int{SCONST}},
    83  		{`$a$b$a$`, []int{SCONST}},
    84  		{`$a$b b$a$`, []int{SCONST}},
    85  		{`$a$ $a$`, []int{SCONST}},
    86  		{`$a$1$b$2$b$3$a$`, []int{SCONST}},
    87  		{`$a$1$b$2$b3$a$`, []int{SCONST}},
    88  		{`$a$1$$3$a$`, []int{SCONST}},
    89  		{`$a$1$$3$a$`, []int{SCONST}},
    90  		{`$a$1$3$a$`, []int{SCONST}},
    91  		{`$ab$1$a$ab$`, []int{SCONST}},
    92  		{`$$~!@#$%^&*()_+:",./<>?;'$$`, []int{SCONST}},
    93  		{`$$hello
    94  world$$`, []int{SCONST}},
    95  		{`b'a'`, []int{BCONST}},
    96  		{`b'\xff'`, []int{BCONST}},
    97  		{`B'10101'`, []int{BITCONST}},
    98  		{`e'a'`, []int{SCONST}},
    99  		{`E'a'`, []int{SCONST}},
   100  		{`NOT`, []int{NOT}},
   101  		{`NOT BETWEEN`, []int{NOT, BETWEEN}},
   102  		{`NOT IN`, []int{NOT, IN}},
   103  		{`NOT SIMILAR`, []int{NOT, SIMILAR}},
   104  		{`WITH`, []int{WITH}},
   105  		{`WITH TIME`, []int{WITH, TIME}},
   106  		{`WITH ORDINALITY`, []int{WITH, ORDINALITY}},
   107  		{`1`, []int{ICONST}},
   108  		{`0xa`, []int{ICONST}},
   109  		{`x'2F'`, []int{BCONST}},
   110  		{`X'2F'`, []int{BCONST}},
   111  		{`1.0`, []int{FCONST}},
   112  		{`1.0e1`, []int{FCONST}},
   113  		{`1e+1`, []int{FCONST}},
   114  		{`1e-1`, []int{FCONST}},
   115  	}
   116  	for i, d := range testData {
   117  		s := makeScanner(d.sql)
   118  		var tokens []int
   119  		for {
   120  			var lval sqlSymType
   121  			s.scan(&lval)
   122  			if lval.id == 0 {
   123  				break
   124  			}
   125  			tokens = append(tokens, int(lval.id))
   126  		}
   127  
   128  		if !reflect.DeepEqual(d.expected, tokens) {
   129  			t.Errorf("%d: %q: expected %d, but found %d", i, d.sql, d.expected, tokens)
   130  		}
   131  	}
   132  }
   133  
   134  func TestScanComment(t *testing.T) {
   135  	testData := []struct {
   136  		sql       string
   137  		err       string
   138  		remainder string
   139  	}{
   140  		{`/* hello */world`, "", "world"},
   141  		{`/* hello */*`, "", "*"},
   142  		{`/* /* deeply /* nested */ comment */ */`, "", ""},
   143  		{`/* /* */* */`, "", ""},
   144  		{`/* /* /*/ */ */ */`, "", ""},
   145  		{`/* multi line
   146  comment */`, "", ""},
   147  		{`-- hello world
   148  foo`, "", "foo"},
   149  		{`/*`, "unterminated comment", ""},
   150  		{`/*/`, "unterminated comment", ""},
   151  		{`/* /* */`, "unterminated comment", ""},
   152  	}
   153  	for i, d := range testData {
   154  		s := makeScanner(d.sql)
   155  		var lval sqlSymType
   156  		present, ok := s.scanComment(&lval)
   157  		if d.err == "" && (!present || !ok) {
   158  			t.Fatalf("%d: expected success, but found %s", i, lval.str)
   159  		} else if d.err != "" && (present || ok || d.err != lval.str) {
   160  			t.Fatalf("%d: expected %s, but found %s", i, d.err, lval.str)
   161  		}
   162  		if r := s.in[s.pos:]; d.remainder != r {
   163  			t.Fatalf("%d: expected '%s', but found '%s'", i, d.remainder, r)
   164  		}
   165  	}
   166  }
   167  
   168  func TestScanKeyword(t *testing.T) {
   169  	for _, kwName := range lex.KeywordNames {
   170  		s := makeScanner(kwName)
   171  		var lval sqlSymType
   172  		s.scan(&lval)
   173  		if id := lex.GetKeywordID(kwName); id != lval.id {
   174  			t.Errorf("%s: expected %d, but found %d", kwName, id, lval.id)
   175  		}
   176  	}
   177  }
   178  
   179  func TestScanNumber(t *testing.T) {
   180  	testData := []struct {
   181  		sql      string
   182  		expected string
   183  		id       int
   184  	}{
   185  		{`0`, `0`, ICONST},
   186  		{`000`, `0`, ICONST},
   187  		{`1`, `1`, ICONST},
   188  		{`0x1`, `0x1`, ICONST},
   189  		{`0X2`, `0X2`, ICONST},
   190  		{`0xff`, `0xff`, ICONST},
   191  		{`0xff.`, `0xff`, ICONST},
   192  		{`12345`, `12345`, ICONST},
   193  		{`08`, `8`, ICONST},
   194  		{`0011`, `11`, ICONST},
   195  		{`1.`, `1.`, FCONST},
   196  		{`.1`, `.1`, FCONST},
   197  		{`1..2`, `1`, ICONST},
   198  		{`1.2`, `1.2`, FCONST},
   199  		{`1.2e3`, `1.2e3`, FCONST},
   200  		{`1e3`, `1e3`, FCONST},
   201  		{`1e3.4`, `1e3`, FCONST},
   202  		{`.1e3.4`, `.1e3`, FCONST},
   203  		{`1e-3`, `1e-3`, FCONST},
   204  		{`1e-3-`, `1e-3`, FCONST},
   205  		{`1e+3`, `1e+3`, FCONST},
   206  		{`1e+3+`, `1e+3`, FCONST},
   207  		{`9223372036854775809`, `9223372036854775809`, ICONST},
   208  	}
   209  	for _, d := range testData {
   210  		s := makeScanner(d.sql)
   211  		var lval sqlSymType
   212  		s.scan(&lval)
   213  		if d.id != int(lval.id) {
   214  			t.Errorf("%s: expected %d, but found %d", d.sql, d.id, lval.id)
   215  		}
   216  		if d.expected != lval.str {
   217  			t.Errorf("%s: expected %s, but found %s", d.sql, d.expected, lval.str)
   218  		}
   219  	}
   220  }
   221  
   222  func TestScanPlaceholder(t *testing.T) {
   223  	testData := []struct {
   224  		sql      string
   225  		expected string
   226  	}{
   227  		{`$1`, "1"},
   228  		{`$1a`, "1"},
   229  		{`$123`, "123"},
   230  	}
   231  	for _, d := range testData {
   232  		s := makeScanner(d.sql)
   233  		var lval sqlSymType
   234  		s.scan(&lval)
   235  		if lval.id != PLACEHOLDER {
   236  			t.Errorf("%s: expected %d, but found %d", d.sql, PLACEHOLDER, lval.id)
   237  		}
   238  		if d.expected != lval.str {
   239  			t.Errorf("%s: expected %s, but found %s", d.sql, d.expected, lval.str)
   240  		}
   241  	}
   242  }
   243  
   244  func TestScanString(t *testing.T) {
   245  	testData := []struct {
   246  		sql      string
   247  		expected string
   248  	}{
   249  		{`"a"`, `a`},
   250  		{`'a'`, `a`},
   251  		{`"a""b"`, `a"b`},
   252  		{`"a''b"`, `a''b`},
   253  		{`'a""b'`, `a""b`},
   254  		{`'a''b'`, `a'b`},
   255  		{`"a" "b"`, `a`},
   256  		{`'a' 'b'`, `a`},
   257  		{`'\n'`, `\n`},
   258  		{`e'\n'`, "\n"},
   259  		{`'\\n'`, `\\n`},
   260  		{`'\'''`, `\'`},
   261  		{`'\0\'`, `\0\`},
   262  		{`"a"
   263  	"b"`, `ab`},
   264  		{`"a"
   265  	'b'`, `a`},
   266  		{`'a'
   267  	'b'`, `ab`},
   268  		{`'a'
   269  	"b"`, `a`},
   270  		{`e'\"'`, `"`}, // redundant escape
   271  		{"'\n\\'", "\n\\"},
   272  		{`e'foo\"\'\\\a\b\f\n\r\t\vbar'`,
   273  			strings.Join([]string{`foo"'\`, "\a\b\f\n\r\t\v", `bar`}, "")},
   274  		{`e'\\0'`, `\0`},
   275  		{`'\0'`, `\0`},
   276  		{`e'\x'`, `invalid syntax`},
   277  		{`e'\x4'`, `invalid syntax`},
   278  		{`e'\xg'`, `invalid syntax`},
   279  		{`e'\X4'`, `invalid syntax`},
   280  		{`e'\x41'`, `A`},
   281  		{`e'\X41B'`, `AB`},
   282  		{`e'\0'`, `invalid syntax`},
   283  		{`e'\00'`, `invalid syntax`},
   284  		{`e'\009'`, `invalid syntax`},
   285  		{`e'\101'`, `A`},
   286  		{`e'\101B'`, `AB`},
   287  		{`e'\xff'`, `invalid UTF-8 byte sequence`},
   288  		{`e'\u1'`, `invalid syntax`},
   289  		{`e'\U123'`, `invalid syntax`},
   290  		{`e'\u0041'`, `A`},
   291  		{`e'\u0041B'`, `AB`},
   292  		{`e'\U00000041'`, `A`},
   293  		{`e'\U00000041B'`, `AB`},
   294  		{`"''"`, `''`},
   295  		{`'""'''`, `""'`},
   296  		{`""""`, `"`},
   297  		{`''''`, `'`},
   298  		{`''''''`, `''`},
   299  		{`'hello
   300  world'`, `hello
   301  world`},
   302  		{`x'666f6f'`, `foo`},
   303  		{`X'626172'`, `bar`},
   304  		{`X'FF'`, "\xff"},
   305  		{`B'100101'`, "100101"},
   306  		{`$$a$$`, "a"},
   307  		{`$a$b$a$`, "b"},
   308  		{`$a$b b$a$`, "b b"},
   309  		{`$a$ $a$`, " "},
   310  		{`$a$1$b$2$b$3$a$`, "1$b$2$b$3"},
   311  		{`$a$1$b$2$b3$a$`, "1$b$2$b3"},
   312  		{`$a$1$$3$a$`, "1$$3"},
   313  		{`$a$1$3$a$`, "1$3"},
   314  		{`$ab$1$a$ab$`, "1$a"},
   315  		{`$$~!@#$%^&*()_+:",./<>?;'$$`, "~!@#$%^&*()_+:\",./<>?;'"},
   316  		{`$$hello
   317  world$$`, `hello
   318  world`},
   319  		{`$$a`, `unterminated string`},
   320  		{`$a$a$$`, `unterminated string`},
   321  	}
   322  	for _, d := range testData {
   323  		s := makeScanner(d.sql)
   324  		var lval sqlSymType
   325  		s.scan(&lval)
   326  		if d.expected != lval.str {
   327  			t.Errorf("%s: expected %q, but found %q", d.sql, d.expected, lval.str)
   328  		}
   329  	}
   330  }
   331  
   332  func TestScanError(t *testing.T) {
   333  	testData := []struct {
   334  		sql string
   335  		err string
   336  	}{
   337  		{`1e`, "invalid floating point literal"},
   338  		{`1e-`, "invalid floating point literal"},
   339  		{`1e+`, "invalid floating point literal"},
   340  		{`0x`, "invalid hexadecimal numeric literal"},
   341  		{`1x`, "invalid hexadecimal numeric literal"},
   342  		{`1.x`, "invalid hexadecimal numeric literal"},
   343  		{`1.0x`, "invalid hexadecimal numeric literal"},
   344  		{`0x0x`, "invalid hexadecimal numeric literal"},
   345  		{`00x0x`, "invalid hexadecimal numeric literal"},
   346  		{`x'zzz'`, "invalid hexadecimal bytes literal"},
   347  		{`X'zzz'`, "invalid hexadecimal bytes literal"},
   348  		{`x'beef\x41'`, "invalid hexadecimal bytes literal"},
   349  		{`X'beef\x41\x41'`, "invalid hexadecimal bytes literal"},
   350  		{`x'a'`, "invalid hexadecimal bytes literal"},
   351  		{`$0`, "placeholder index must be between 1 and 65536"},
   352  		{`$9223372036854775809`, "placeholder index must be between 1 and 65536"},
   353  		{`B'123'`, `"2" is not a valid binary digit`},
   354  	}
   355  	for _, d := range testData {
   356  		s := makeScanner(d.sql)
   357  		var lval sqlSymType
   358  		s.scan(&lval)
   359  		if lval.id != ERROR {
   360  			t.Errorf("%s: expected ERROR, but found %d", d.sql, lval.id)
   361  		}
   362  		if !testutils.IsError(errors.Newf("%s", lval.str), d.err) {
   363  			t.Errorf("%s: expected %s, but found %v", d.sql, d.err, lval.str)
   364  		}
   365  	}
   366  }
   367  
   368  func TestSplitFirstStatement(t *testing.T) {
   369  	defer leaktest.AfterTest(t)()
   370  	tests := []struct {
   371  		s, res string
   372  	}{
   373  		{
   374  			s:   "SELECT 1",
   375  			res: "",
   376  		},
   377  		{
   378  			s:   "SELECT 1;",
   379  			res: "SELECT 1;",
   380  		},
   381  		{
   382  			s:   "SELECT 1  /* comment */ ;",
   383  			res: "SELECT 1  /* comment */ ;",
   384  		},
   385  		{
   386  			s:   "SELECT 1;SELECT 2",
   387  			res: "SELECT 1;",
   388  		},
   389  		{
   390  			s:   "SELECT 1  /* comment */ ;SELECT 2",
   391  			res: "SELECT 1  /* comment */ ;",
   392  		},
   393  		{
   394  			s:   "SELECT 1  /* comment */ ; /* comment */ SELECT 2",
   395  			res: "SELECT 1  /* comment */ ;",
   396  		},
   397  		{
   398  			s:   ";",
   399  			res: ";",
   400  		},
   401  		{
   402  			s:   "SELECT ';'",
   403  			res: "",
   404  		},
   405  	}
   406  
   407  	for i, tc := range tests {
   408  		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
   409  			pos, ok := SplitFirstStatement(tc.s)
   410  			if !ok && pos != 0 {
   411  				t.Fatalf("!ok but nonzero pos")
   412  			}
   413  			if tc.res != tc.s[:pos] {
   414  				t.Errorf("expected `%s` but got `%s`", tc.res, tc.s[:pos])
   415  			}
   416  		})
   417  	}
   418  }
   419  
   420  func TestLastLexicalToken(t *testing.T) {
   421  	defer leaktest.AfterTest(t)()
   422  	tests := []struct {
   423  		s   string
   424  		res int
   425  	}{
   426  		{
   427  			s:   "",
   428  			res: 0,
   429  		},
   430  		{
   431  			s:   " /* comment */ ",
   432  			res: 0,
   433  		},
   434  		{
   435  			s:   "SELECT",
   436  			res: SELECT,
   437  		},
   438  		{
   439  			s:   "SELECT 1",
   440  			res: ICONST,
   441  		},
   442  		{
   443  			s:   "SELECT 1;",
   444  			res: ';',
   445  		},
   446  		{
   447  			s:   "SELECT 1; /* comment */",
   448  			res: ';',
   449  		},
   450  		{
   451  			s: `SELECT 1;
   452  			    -- comment`,
   453  			res: ';',
   454  		},
   455  		{
   456  			s: `
   457  				--SELECT 1, 2, 3;
   458  				SELECT 4, 5
   459  				--blah`,
   460  			res: ICONST,
   461  		},
   462  		{
   463  			s: `
   464  				--SELECT 1, 2, 3;
   465  				SELECT 4, 5;
   466  				--blah`,
   467  			res: ';',
   468  		},
   469  		{
   470  			s:   `SELECT 'unfinished`,
   471  			res: ERROR,
   472  		},
   473  		{
   474  			s:   `SELECT e'\xaa';`, // invalid token but last token is semicolon
   475  			res: ';',
   476  		},
   477  	}
   478  
   479  	for i, tc := range tests {
   480  		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
   481  			tok, ok := LastLexicalToken(tc.s)
   482  			if !ok && tok != 0 {
   483  				t.Fatalf("!ok but nonzero tok")
   484  			}
   485  			if tc.res != tok {
   486  				t.Errorf("expected %d but got %d", tc.res, tok)
   487  			}
   488  		})
   489  	}
   490  }