github.com/mithrandie/csvq@v1.18.1/lib/parser/scanner_test.go (about)

     1  package parser
     2  
     3  import (
     4  	"testing"
     5  )
     6  
     7  type scanResult struct {
     8  	Token         int
     9  	Literal       string
    10  	Quoted        bool
    11  	HolderOrdinal int
    12  	Line          int
    13  	Char          int
    14  }
    15  
    16  var scanTests = []struct {
    17  	Name        string
    18  	Input       string
    19  	ForPrepared bool
    20  	AnsiQuotes  bool
    21  	Output      []scanResult
    22  	Error       string
    23  }{
    24  	{
    25  		Name:  "Identifier",
    26  		Input: "identifier",
    27  		Output: []scanResult{
    28  			{
    29  				Token:   IDENTIFIER,
    30  				Literal: "identifier",
    31  			},
    32  		},
    33  	},
    34  	{
    35  		Name:  "QuotedIdentifier",
    36  		Input: "`id\\enti\\`fier```",
    37  		Output: []scanResult{
    38  			{
    39  				Token:   IDENTIFIER,
    40  				Literal: "id\\enti`fier`",
    41  				Quoted:  true,
    42  			},
    43  		},
    44  	},
    45  	{
    46  		Name:  "QuotedString",
    47  		Input: "\"string\\\"\"",
    48  		Output: []scanResult{
    49  			{
    50  				Token:   STRING,
    51  				Literal: "string\"",
    52  			},
    53  		},
    54  	},
    55  	{
    56  		Name:  "QuotedString 2",
    57  		Input: "\"string\\\\\"",
    58  		Output: []scanResult{
    59  			{
    60  				Token:   STRING,
    61  				Literal: "string\\",
    62  			},
    63  		},
    64  	},
    65  	{
    66  		Name:  "QuotedString(Single-Quote)",
    67  		Input: "'strin\\'g string'",
    68  		Output: []scanResult{
    69  			{
    70  				Token:   STRING,
    71  				Literal: "strin'g string",
    72  			},
    73  		},
    74  	},
    75  	{
    76  		Name:  "QuotedString Escape Mark",
    77  		Input: "\"string\\t\"",
    78  		Output: []scanResult{
    79  			{
    80  				Token:   STRING,
    81  				Literal: "string\t",
    82  			},
    83  		},
    84  	},
    85  	{
    86  		Name:  "QuotedString Double Escape Mark",
    87  		Input: "\"string\\\\t\"",
    88  		Output: []scanResult{
    89  			{
    90  				Token:   STRING,
    91  				Literal: "string\\t",
    92  			},
    93  		},
    94  	},
    95  	{
    96  		Name:  "QuotedString Double Quotation Mark",
    97  		Input: "\"string\"\"string\"",
    98  		Output: []scanResult{
    99  			{
   100  				Token:   STRING,
   101  				Literal: "string\"string",
   102  			},
   103  		},
   104  	},
   105  	{
   106  		Name:       "AnsiQuotes",
   107  		Input:      "\"identifier\"",
   108  		AnsiQuotes: true,
   109  		Output: []scanResult{
   110  			{
   111  				Token:   IDENTIFIER,
   112  				Literal: "identifier",
   113  				Quoted:  true,
   114  			},
   115  		},
   116  	},
   117  	{
   118  		Name:  "Integer",
   119  		Input: "1",
   120  		Output: []scanResult{
   121  			{
   122  				Token:   INTEGER,
   123  				Literal: "1",
   124  			},
   125  		},
   126  	},
   127  	{
   128  		Name:  "Float",
   129  		Input: "1.234",
   130  		Output: []scanResult{
   131  			{
   132  				Token:   FLOAT,
   133  				Literal: "1.234",
   134  			},
   135  		},
   136  	},
   137  	{
   138  		Name:  "Flaot with Exponential Notation",
   139  		Input: "1.234e+2",
   140  		Output: []scanResult{
   141  			{
   142  				Token:   FLOAT,
   143  				Literal: "1.234e+2",
   144  			},
   145  		},
   146  	},
   147  	{
   148  		Name:  "Invalid Number",
   149  		Input: "1.234e+",
   150  		Error: "cound not convert \"1.234e+\" to a number",
   151  	},
   152  	{
   153  		Name:  "Ternary",
   154  		Input: "true",
   155  		Output: []scanResult{
   156  			{
   157  				Token:   TERNARY,
   158  				Literal: "true",
   159  			},
   160  		},
   161  	},
   162  	{
   163  		Name:  "Flag",
   164  		Input: "@@flag",
   165  		Output: []scanResult{
   166  			{
   167  				Token:   FLAG,
   168  				Literal: "flag",
   169  			},
   170  		},
   171  	},
   172  	{
   173  		Name:  "Variable",
   174  		Input: "@var",
   175  		Output: []scanResult{
   176  			{
   177  				Token:   VARIABLE,
   178  				Literal: "var",
   179  			},
   180  		},
   181  	},
   182  	{
   183  		Name:  "Environment Variable",
   184  		Input: "@%var",
   185  		Output: []scanResult{
   186  			{
   187  				Token:   ENVIRONMENT_VARIABLE,
   188  				Literal: "var",
   189  			},
   190  		},
   191  	},
   192  	{
   193  		Name:  "Environment Variable Quoted",
   194  		Input: "@%`var`",
   195  		Output: []scanResult{
   196  			{
   197  				Token:   ENVIRONMENT_VARIABLE,
   198  				Literal: "var",
   199  				Quoted:  true,
   200  			},
   201  		},
   202  	},
   203  	{
   204  		Name:  "Runtime Information",
   205  		Input: "@#var",
   206  		Output: []scanResult{
   207  			{
   208  				Token:   RUNTIME_INFORMATION,
   209  				Literal: "var",
   210  			},
   211  		},
   212  	},
   213  	{
   214  		Name:  "Constant",
   215  		Input: "SPACE::NAME",
   216  		Output: []scanResult{
   217  			{
   218  				Token:   CONSTANT,
   219  				Literal: "SPACE::NAME",
   220  			},
   221  		},
   222  	},
   223  	{
   224  		Name:  "Constant Syntax Error",
   225  		Input: "SPACE:: ",
   226  		Error: "invalid constant syntax",
   227  	},
   228  	{
   229  		Name:  "Constant Syntax Error",
   230  		Input: "SPACE::+",
   231  		Error: "invalid constant syntax",
   232  	},
   233  	{
   234  		Name:  "File Path",
   235  		Input: "file:./path",
   236  		Output: []scanResult{
   237  			{
   238  				Token:   URL,
   239  				Literal: "file:./path",
   240  			},
   241  		},
   242  	},
   243  	{
   244  		Name:  "Url",
   245  		Input: "file:///home/my%20dir/path|",
   246  		Output: []scanResult{
   247  			{
   248  				Token:   URL,
   249  				Literal: "file:///home/my%20dir/path",
   250  			},
   251  			{
   252  				Token:   '|',
   253  				Literal: "|",
   254  			},
   255  		},
   256  	},
   257  	{
   258  		Name:  "Table Function",
   259  		Input: "file::('/home/my dir/path')",
   260  		Output: []scanResult{
   261  			{
   262  				Token:   TABLE_FUNCTION,
   263  				Literal: "file",
   264  			},
   265  			{
   266  				Token:   '(',
   267  				Literal: "(",
   268  			},
   269  			{
   270  				Token:   STRING,
   271  				Literal: "/home/my dir/path",
   272  			},
   273  			{
   274  				Token:   ')',
   275  				Literal: ")",
   276  			},
   277  		},
   278  	},
   279  	{
   280  		Name:  "Identifier starting with \"_\"",
   281  		Input: "_foo:",
   282  		Output: []scanResult{
   283  			{
   284  				Token:   IDENTIFIER,
   285  				Literal: "_foo",
   286  			},
   287  			{
   288  				Token:   ':',
   289  				Literal: ":",
   290  			},
   291  		},
   292  	},
   293  	{
   294  		Name:  "EqualSign",
   295  		Input: "=",
   296  		Output: []scanResult{
   297  			{
   298  				Token:   '=',
   299  				Literal: "=",
   300  			},
   301  		},
   302  	},
   303  	{
   304  		Name:  "ComparisonOperator",
   305  		Input: "<=",
   306  		Output: []scanResult{
   307  			{
   308  				Token:   COMPARISON_OP,
   309  				Literal: "<=",
   310  			},
   311  		},
   312  	},
   313  	{
   314  		Name:  "StringOperator",
   315  		Input: "||",
   316  		Output: []scanResult{
   317  			{
   318  				Token:   STRING_OP,
   319  				Literal: "||",
   320  			},
   321  		},
   322  	},
   323  	{
   324  		Name:  "SubstitutionOperator",
   325  		Input: ":=",
   326  		Output: []scanResult{
   327  			{
   328  				Token:   SUBSTITUTION_OP,
   329  				Literal: ":=",
   330  			},
   331  		},
   332  	},
   333  	{
   334  		Name:  "UncategorizedOperator",
   335  		Input: "====",
   336  		Output: []scanResult{
   337  			{
   338  				Token:   Uncategorized,
   339  				Literal: "====",
   340  			},
   341  		},
   342  	},
   343  	{
   344  		Name:  "Keyword",
   345  		Input: "select",
   346  		Output: []scanResult{
   347  			{
   348  				Token:   SELECT,
   349  				Literal: "select",
   350  			},
   351  		},
   352  	},
   353  	{
   354  		Name:  "AggregateFunction",
   355  		Input: "sum",
   356  		Output: []scanResult{
   357  			{
   358  				Token:   AGGREGATE_FUNCTION,
   359  				Literal: "sum",
   360  			},
   361  		},
   362  	},
   363  	{
   364  		Name:  "AnalyticFunction",
   365  		Input: "rank",
   366  		Output: []scanResult{
   367  			{
   368  				Token:   ANALYTIC_FUNCTION,
   369  				Literal: "rank",
   370  			},
   371  		},
   372  	},
   373  	{
   374  		Name:  "FunctionNTH",
   375  		Input: "nth_value",
   376  		Output: []scanResult{
   377  			{
   378  				Token:   FUNCTION_NTH,
   379  				Literal: "nth_value",
   380  			},
   381  		},
   382  	},
   383  	{
   384  		Name:  "FunctionWithINS",
   385  		Input: "lag",
   386  		Output: []scanResult{
   387  			{
   388  				Token:   FUNCTION_WITH_INS,
   389  				Literal: "lag",
   390  			},
   391  		},
   392  	},
   393  	{
   394  		Name:  "PassThrough",
   395  		Input: ",",
   396  		Output: []scanResult{
   397  			{
   398  				Token:   int(','),
   399  				Literal: ",",
   400  			},
   401  		},
   402  	},
   403  	{
   404  		Name:  "Statement",
   405  		Input: "identifier   'string', \n 1-2",
   406  		Output: []scanResult{
   407  			{
   408  				Token:   IDENTIFIER,
   409  				Literal: "identifier",
   410  			},
   411  			{
   412  				Token:   STRING,
   413  				Literal: "string",
   414  			},
   415  			{
   416  				Token:   int(','),
   417  				Literal: ",",
   418  			},
   419  			{
   420  				Token:   INTEGER,
   421  				Literal: "1",
   422  			},
   423  			{
   424  				Token:   int('-'),
   425  				Literal: "-",
   426  			},
   427  			{
   428  				Token:   INTEGER,
   429  				Literal: "2",
   430  			},
   431  		},
   432  	},
   433  	{
   434  		Name:  "Comment",
   435  		Input: "identifier/* 'string', \n 1*/-2",
   436  		Output: []scanResult{
   437  			{
   438  				Token:   IDENTIFIER,
   439  				Literal: "identifier",
   440  			},
   441  			{
   442  				Token:   int('-'),
   443  				Literal: "-",
   444  			},
   445  			{
   446  				Token:   INTEGER,
   447  				Literal: "2",
   448  			},
   449  		},
   450  	},
   451  	{
   452  		Name:  "CommentNotTerminated",
   453  		Input: "identifier/* 'string', \n 1-2",
   454  		Output: []scanResult{
   455  			{
   456  				Token:   IDENTIFIER,
   457  				Literal: "identifier",
   458  			},
   459  		},
   460  	},
   461  	{
   462  		Name:  "External Command",
   463  		Input: "$abc",
   464  		Output: []scanResult{
   465  			{
   466  				Token:   EXTERNAL_COMMAND,
   467  				Literal: "abc",
   468  			},
   469  		},
   470  	},
   471  	{
   472  		Name:  "External Command with LineBreak",
   473  		Input: "$abc\nd\\ef\n ghi\\",
   474  		Output: []scanResult{
   475  			{
   476  				Token:   EXTERNAL_COMMAND,
   477  				Literal: "abc\nd\\ef\n ghi\\",
   478  			},
   479  		},
   480  	},
   481  	{
   482  		Name:  "External Command with Terminator",
   483  		Input: "$abc 'de\\'f;' ${gh\\}i;} @%`var;`;",
   484  		Output: []scanResult{
   485  			{
   486  				Token:   EXTERNAL_COMMAND,
   487  				Literal: "abc 'de\\'f;' ${gh\\}i;} @%`var;`",
   488  			},
   489  			{
   490  				Token:   ';',
   491  				Literal: ";",
   492  			},
   493  		},
   494  	},
   495  	{
   496  		Name:  "LineComment",
   497  		Input: "identifier-- comment 'string', \n 1-2 -- comment \r 2 -- comment",
   498  		Output: []scanResult{
   499  			{
   500  				Token:   IDENTIFIER,
   501  				Literal: "identifier",
   502  			},
   503  			{
   504  				Token:   INTEGER,
   505  				Literal: "1",
   506  			},
   507  			{
   508  				Token:   int('-'),
   509  				Literal: "-",
   510  			},
   511  			{
   512  				Token:   INTEGER,
   513  				Literal: "2",
   514  			},
   515  			{
   516  				Token:   INTEGER,
   517  				Literal: "2",
   518  			},
   519  		},
   520  	},
   521  	{
   522  		Name:  "Line and Char Count",
   523  		Input: "a, \n  /* \n\n */ \r\n c \rd 'abc\ndef' --f\n g",
   524  		Output: []scanResult{
   525  			{
   526  				Token:   IDENTIFIER,
   527  				Literal: "a",
   528  				Line:    1,
   529  				Char:    1,
   530  			},
   531  			{
   532  				Token:   int(','),
   533  				Literal: ",",
   534  				Line:    1,
   535  				Char:    2,
   536  			},
   537  			{
   538  				Token:   IDENTIFIER,
   539  				Literal: "c",
   540  				Line:    5,
   541  				Char:    2,
   542  			},
   543  			{
   544  				Token:   IDENTIFIER,
   545  				Literal: "d",
   546  				Line:    6,
   547  				Char:    1,
   548  			},
   549  			{
   550  				Token:   STRING,
   551  				Literal: "abc\ndef",
   552  				Line:    6,
   553  				Char:    3,
   554  			},
   555  			{
   556  				Token:   IDENTIFIER,
   557  				Literal: "g",
   558  				Line:    8,
   559  				Char:    2,
   560  			},
   561  		},
   562  	},
   563  	{
   564  		Name:  "LiteralNotTerminatedError",
   565  		Input: "\"string",
   566  		Error: "literal not terminated",
   567  	},
   568  	{
   569  		Name:  "LiteralNotTerminatedError 2",
   570  		Input: "\"",
   571  		Error: "literal not terminated",
   572  	},
   573  	{
   574  		Name:  "Invalid Variable Symbol",
   575  		Input: "@@@",
   576  		Error: "invalid variable symbol",
   577  	},
   578  	{
   579  		Name:        "Placeholders",
   580  		Input:       "? :foo",
   581  		ForPrepared: true,
   582  		Output: []scanResult{
   583  			{
   584  				Token:         PLACEHOLDER,
   585  				Literal:       "?",
   586  				HolderOrdinal: 1,
   587  			},
   588  			{
   589  				Token:         PLACEHOLDER,
   590  				Literal:       ":foo",
   591  				HolderOrdinal: 2,
   592  			},
   593  		},
   594  	},
   595  	{
   596  		Name:        "Placeholders",
   597  		Input:       "? :?",
   598  		ForPrepared: true,
   599  		Output: []scanResult{
   600  			{
   601  				Token:         PLACEHOLDER,
   602  				Literal:       "?",
   603  				HolderOrdinal: 1,
   604  			},
   605  			{
   606  				Token:   ':',
   607  				Literal: ":",
   608  			},
   609  			{
   610  				Token:         PLACEHOLDER,
   611  				Literal:       "?",
   612  				HolderOrdinal: 2,
   613  			},
   614  		},
   615  	},
   616  	{
   617  		Name:        "Placeholder Disabled",
   618  		Input:       "?",
   619  		ForPrepared: false,
   620  		Output: []scanResult{
   621  			{
   622  				Token:   '?',
   623  				Literal: "?",
   624  			},
   625  		},
   626  	},
   627  	{
   628  		Name:        "Placeholder Disabled",
   629  		Input:       ":foo",
   630  		ForPrepared: false,
   631  		Output: []scanResult{
   632  			{
   633  				Token:   ':',
   634  				Literal: ":",
   635  			},
   636  			{
   637  				Token:   IDENTIFIER,
   638  				Literal: "foo",
   639  			},
   640  		},
   641  	},
   642  }
   643  
   644  func TestScanner_Scan(t *testing.T) {
   645  	for _, v := range scanTests {
   646  		s := new(Scanner).Init(v.Input, "", v.ForPrepared, v.AnsiQuotes)
   647  
   648  		tokenCount := 0
   649  		for {
   650  			token, err := s.Scan()
   651  			tokenCount++
   652  
   653  			if err != nil {
   654  				if v.Error == "" {
   655  					t.Errorf("%s, token %d: unexpected error %q", v.Name, tokenCount, err.Error())
   656  				} else if v.Error != err.Error() {
   657  					t.Errorf("%s, token %d: error %q, want error %q", v.Name, tokenCount, err.Error(), v.Error)
   658  				}
   659  				break
   660  			}
   661  			if v.Error != "" {
   662  				t.Errorf("%s, token %d: no error, want error %q", v.Name, tokenCount, v.Error)
   663  				break
   664  			}
   665  
   666  			if token.Token == EOF {
   667  				tokenCount--
   668  				if tokenCount != len(v.Output) {
   669  					t.Errorf("%s: scan %d token(s) in a statement, want %d token(s)", v.Name, tokenCount, len(v.Output))
   670  				}
   671  				break
   672  			}
   673  
   674  			if len(v.Output) < tokenCount {
   675  				t.Errorf("%s: scan %d token(s) in a statement, want %d token(s)", v.Name, tokenCount, len(v.Output))
   676  				break
   677  			}
   678  			expect := v.Output[tokenCount-1]
   679  			if token.Token != expect.Token {
   680  				t.Errorf("%s, token %d: token = %s, want %s", v.Name, tokenCount, TokenLiteral(token.Token), TokenLiteral(expect.Token))
   681  			}
   682  			if token.Literal != expect.Literal {
   683  				t.Errorf("%s, token %d: literal = %q, want %q", v.Name, tokenCount, token.Literal, expect.Literal)
   684  			}
   685  			if token.Quoted != expect.Quoted {
   686  				t.Errorf("%s, token %d: quoted = %t, want %t", v.Name, tokenCount, token.Quoted, expect.Quoted)
   687  			}
   688  			if token.HolderOrdinal != expect.HolderOrdinal {
   689  				t.Errorf("%s, token %d: holder ordinal = %d, want %d", v.Name, tokenCount, token.HolderOrdinal, expect.HolderOrdinal)
   690  			}
   691  			if 0 < expect.Line {
   692  				if token.Line != expect.Line {
   693  					t.Errorf("%s, token %d: line %d: want %d", v.Name, tokenCount, token.Line, expect.Line)
   694  				}
   695  				if token.Char != expect.Char {
   696  					t.Errorf("%s, token %d: char %d: want %d", v.Name, tokenCount, token.Char, expect.Char)
   697  				}
   698  			}
   699  		}
   700  	}
   701  }
   702  
   703  var tokenLiteralTests = map[int]string{
   704  	SELECT: "SELECT",
   705  	43:     "+",
   706  }
   707  
   708  func TestTokenLiteral(t *testing.T) {
   709  	for k, v := range tokenLiteralTests {
   710  		n := TokenLiteral(k)
   711  		if n != v {
   712  			t.Errorf("token literal = %q, want %q for %d", n, v, k)
   713  		}
   714  	}
   715  }