github.com/mithrandie/csvq@v1.18.1/lib/parser/scanner_test.go (about) 1 package parser 2 3 import ( 4 "testing" 5 ) 6 7 type scanResult struct { 8 Token int 9 Literal string 10 Quoted bool 11 HolderOrdinal int 12 Line int 13 Char int 14 } 15 16 var scanTests = []struct { 17 Name string 18 Input string 19 ForPrepared bool 20 AnsiQuotes bool 21 Output []scanResult 22 Error string 23 }{ 24 { 25 Name: "Identifier", 26 Input: "identifier", 27 Output: []scanResult{ 28 { 29 Token: IDENTIFIER, 30 Literal: "identifier", 31 }, 32 }, 33 }, 34 { 35 Name: "QuotedIdentifier", 36 Input: "`id\\enti\\`fier```", 37 Output: []scanResult{ 38 { 39 Token: IDENTIFIER, 40 Literal: "id\\enti`fier`", 41 Quoted: true, 42 }, 43 }, 44 }, 45 { 46 Name: "QuotedString", 47 Input: "\"string\\\"\"", 48 Output: []scanResult{ 49 { 50 Token: STRING, 51 Literal: "string\"", 52 }, 53 }, 54 }, 55 { 56 Name: "QuotedString 2", 57 Input: "\"string\\\\\"", 58 Output: []scanResult{ 59 { 60 Token: STRING, 61 Literal: "string\\", 62 }, 63 }, 64 }, 65 { 66 Name: "QuotedString(Single-Quote)", 67 Input: "'strin\\'g string'", 68 Output: []scanResult{ 69 { 70 Token: STRING, 71 Literal: "strin'g string", 72 }, 73 }, 74 }, 75 { 76 Name: "QuotedString Escape Mark", 77 Input: "\"string\\t\"", 78 Output: []scanResult{ 79 { 80 Token: STRING, 81 Literal: "string\t", 82 }, 83 }, 84 }, 85 { 86 Name: "QuotedString Double Escape Mark", 87 Input: "\"string\\\\t\"", 88 Output: []scanResult{ 89 { 90 Token: STRING, 91 Literal: "string\\t", 92 }, 93 }, 94 }, 95 { 96 Name: "QuotedString Double Quotation Mark", 97 Input: "\"string\"\"string\"", 98 Output: []scanResult{ 99 { 100 Token: STRING, 101 Literal: "string\"string", 102 }, 103 }, 104 }, 105 { 106 Name: "AnsiQuotes", 107 Input: "\"identifier\"", 108 AnsiQuotes: true, 109 Output: []scanResult{ 110 { 111 Token: IDENTIFIER, 112 Literal: "identifier", 113 Quoted: true, 114 }, 115 }, 116 }, 117 { 118 Name: "Integer", 119 Input: "1", 120 Output: []scanResult{ 121 { 122 Token: INTEGER, 123 Literal: "1", 124 }, 125 }, 126 }, 127 { 128 Name: "Float", 129 Input: "1.234", 130 Output: []scanResult{ 131 { 132 Token: FLOAT, 133 Literal: "1.234", 134 }, 135 }, 136 }, 137 { 138 Name: "Flaot with Exponential Notation", 139 Input: "1.234e+2", 140 Output: []scanResult{ 141 { 142 Token: FLOAT, 143 Literal: "1.234e+2", 144 }, 145 }, 146 }, 147 { 148 Name: "Invalid Number", 149 Input: "1.234e+", 150 Error: "cound not convert \"1.234e+\" to a number", 151 }, 152 { 153 Name: "Ternary", 154 Input: "true", 155 Output: []scanResult{ 156 { 157 Token: TERNARY, 158 Literal: "true", 159 }, 160 }, 161 }, 162 { 163 Name: "Flag", 164 Input: "@@flag", 165 Output: []scanResult{ 166 { 167 Token: FLAG, 168 Literal: "flag", 169 }, 170 }, 171 }, 172 { 173 Name: "Variable", 174 Input: "@var", 175 Output: []scanResult{ 176 { 177 Token: VARIABLE, 178 Literal: "var", 179 }, 180 }, 181 }, 182 { 183 Name: "Environment Variable", 184 Input: "@%var", 185 Output: []scanResult{ 186 { 187 Token: ENVIRONMENT_VARIABLE, 188 Literal: "var", 189 }, 190 }, 191 }, 192 { 193 Name: "Environment Variable Quoted", 194 Input: "@%`var`", 195 Output: []scanResult{ 196 { 197 Token: ENVIRONMENT_VARIABLE, 198 Literal: "var", 199 Quoted: true, 200 }, 201 }, 202 }, 203 { 204 Name: "Runtime Information", 205 Input: "@#var", 206 Output: []scanResult{ 207 { 208 Token: RUNTIME_INFORMATION, 209 Literal: "var", 210 }, 211 }, 212 }, 213 { 214 Name: "Constant", 215 Input: "SPACE::NAME", 216 Output: []scanResult{ 217 { 218 Token: CONSTANT, 219 Literal: "SPACE::NAME", 220 }, 221 }, 222 }, 223 { 224 Name: "Constant Syntax Error", 225 Input: "SPACE:: ", 226 Error: "invalid constant syntax", 227 }, 228 { 229 Name: "Constant Syntax Error", 230 Input: "SPACE::+", 231 Error: "invalid constant syntax", 232 }, 233 { 234 Name: "File Path", 235 Input: "file:./path", 236 Output: []scanResult{ 237 { 238 Token: URL, 239 Literal: "file:./path", 240 }, 241 }, 242 }, 243 { 244 Name: "Url", 245 Input: "file:///home/my%20dir/path|", 246 Output: []scanResult{ 247 { 248 Token: URL, 249 Literal: "file:///home/my%20dir/path", 250 }, 251 { 252 Token: '|', 253 Literal: "|", 254 }, 255 }, 256 }, 257 { 258 Name: "Table Function", 259 Input: "file::('/home/my dir/path')", 260 Output: []scanResult{ 261 { 262 Token: TABLE_FUNCTION, 263 Literal: "file", 264 }, 265 { 266 Token: '(', 267 Literal: "(", 268 }, 269 { 270 Token: STRING, 271 Literal: "/home/my dir/path", 272 }, 273 { 274 Token: ')', 275 Literal: ")", 276 }, 277 }, 278 }, 279 { 280 Name: "Identifier starting with \"_\"", 281 Input: "_foo:", 282 Output: []scanResult{ 283 { 284 Token: IDENTIFIER, 285 Literal: "_foo", 286 }, 287 { 288 Token: ':', 289 Literal: ":", 290 }, 291 }, 292 }, 293 { 294 Name: "EqualSign", 295 Input: "=", 296 Output: []scanResult{ 297 { 298 Token: '=', 299 Literal: "=", 300 }, 301 }, 302 }, 303 { 304 Name: "ComparisonOperator", 305 Input: "<=", 306 Output: []scanResult{ 307 { 308 Token: COMPARISON_OP, 309 Literal: "<=", 310 }, 311 }, 312 }, 313 { 314 Name: "StringOperator", 315 Input: "||", 316 Output: []scanResult{ 317 { 318 Token: STRING_OP, 319 Literal: "||", 320 }, 321 }, 322 }, 323 { 324 Name: "SubstitutionOperator", 325 Input: ":=", 326 Output: []scanResult{ 327 { 328 Token: SUBSTITUTION_OP, 329 Literal: ":=", 330 }, 331 }, 332 }, 333 { 334 Name: "UncategorizedOperator", 335 Input: "====", 336 Output: []scanResult{ 337 { 338 Token: Uncategorized, 339 Literal: "====", 340 }, 341 }, 342 }, 343 { 344 Name: "Keyword", 345 Input: "select", 346 Output: []scanResult{ 347 { 348 Token: SELECT, 349 Literal: "select", 350 }, 351 }, 352 }, 353 { 354 Name: "AggregateFunction", 355 Input: "sum", 356 Output: []scanResult{ 357 { 358 Token: AGGREGATE_FUNCTION, 359 Literal: "sum", 360 }, 361 }, 362 }, 363 { 364 Name: "AnalyticFunction", 365 Input: "rank", 366 Output: []scanResult{ 367 { 368 Token: ANALYTIC_FUNCTION, 369 Literal: "rank", 370 }, 371 }, 372 }, 373 { 374 Name: "FunctionNTH", 375 Input: "nth_value", 376 Output: []scanResult{ 377 { 378 Token: FUNCTION_NTH, 379 Literal: "nth_value", 380 }, 381 }, 382 }, 383 { 384 Name: "FunctionWithINS", 385 Input: "lag", 386 Output: []scanResult{ 387 { 388 Token: FUNCTION_WITH_INS, 389 Literal: "lag", 390 }, 391 }, 392 }, 393 { 394 Name: "PassThrough", 395 Input: ",", 396 Output: []scanResult{ 397 { 398 Token: int(','), 399 Literal: ",", 400 }, 401 }, 402 }, 403 { 404 Name: "Statement", 405 Input: "identifier 'string', \n 1-2", 406 Output: []scanResult{ 407 { 408 Token: IDENTIFIER, 409 Literal: "identifier", 410 }, 411 { 412 Token: STRING, 413 Literal: "string", 414 }, 415 { 416 Token: int(','), 417 Literal: ",", 418 }, 419 { 420 Token: INTEGER, 421 Literal: "1", 422 }, 423 { 424 Token: int('-'), 425 Literal: "-", 426 }, 427 { 428 Token: INTEGER, 429 Literal: "2", 430 }, 431 }, 432 }, 433 { 434 Name: "Comment", 435 Input: "identifier/* 'string', \n 1*/-2", 436 Output: []scanResult{ 437 { 438 Token: IDENTIFIER, 439 Literal: "identifier", 440 }, 441 { 442 Token: int('-'), 443 Literal: "-", 444 }, 445 { 446 Token: INTEGER, 447 Literal: "2", 448 }, 449 }, 450 }, 451 { 452 Name: "CommentNotTerminated", 453 Input: "identifier/* 'string', \n 1-2", 454 Output: []scanResult{ 455 { 456 Token: IDENTIFIER, 457 Literal: "identifier", 458 }, 459 }, 460 }, 461 { 462 Name: "External Command", 463 Input: "$abc", 464 Output: []scanResult{ 465 { 466 Token: EXTERNAL_COMMAND, 467 Literal: "abc", 468 }, 469 }, 470 }, 471 { 472 Name: "External Command with LineBreak", 473 Input: "$abc\nd\\ef\n ghi\\", 474 Output: []scanResult{ 475 { 476 Token: EXTERNAL_COMMAND, 477 Literal: "abc\nd\\ef\n ghi\\", 478 }, 479 }, 480 }, 481 { 482 Name: "External Command with Terminator", 483 Input: "$abc 'de\\'f;' ${gh\\}i;} @%`var;`;", 484 Output: []scanResult{ 485 { 486 Token: EXTERNAL_COMMAND, 487 Literal: "abc 'de\\'f;' ${gh\\}i;} @%`var;`", 488 }, 489 { 490 Token: ';', 491 Literal: ";", 492 }, 493 }, 494 }, 495 { 496 Name: "LineComment", 497 Input: "identifier-- comment 'string', \n 1-2 -- comment \r 2 -- comment", 498 Output: []scanResult{ 499 { 500 Token: IDENTIFIER, 501 Literal: "identifier", 502 }, 503 { 504 Token: INTEGER, 505 Literal: "1", 506 }, 507 { 508 Token: int('-'), 509 Literal: "-", 510 }, 511 { 512 Token: INTEGER, 513 Literal: "2", 514 }, 515 { 516 Token: INTEGER, 517 Literal: "2", 518 }, 519 }, 520 }, 521 { 522 Name: "Line and Char Count", 523 Input: "a, \n /* \n\n */ \r\n c \rd 'abc\ndef' --f\n g", 524 Output: []scanResult{ 525 { 526 Token: IDENTIFIER, 527 Literal: "a", 528 Line: 1, 529 Char: 1, 530 }, 531 { 532 Token: int(','), 533 Literal: ",", 534 Line: 1, 535 Char: 2, 536 }, 537 { 538 Token: IDENTIFIER, 539 Literal: "c", 540 Line: 5, 541 Char: 2, 542 }, 543 { 544 Token: IDENTIFIER, 545 Literal: "d", 546 Line: 6, 547 Char: 1, 548 }, 549 { 550 Token: STRING, 551 Literal: "abc\ndef", 552 Line: 6, 553 Char: 3, 554 }, 555 { 556 Token: IDENTIFIER, 557 Literal: "g", 558 Line: 8, 559 Char: 2, 560 }, 561 }, 562 }, 563 { 564 Name: "LiteralNotTerminatedError", 565 Input: "\"string", 566 Error: "literal not terminated", 567 }, 568 { 569 Name: "LiteralNotTerminatedError 2", 570 Input: "\"", 571 Error: "literal not terminated", 572 }, 573 { 574 Name: "Invalid Variable Symbol", 575 Input: "@@@", 576 Error: "invalid variable symbol", 577 }, 578 { 579 Name: "Placeholders", 580 Input: "? :foo", 581 ForPrepared: true, 582 Output: []scanResult{ 583 { 584 Token: PLACEHOLDER, 585 Literal: "?", 586 HolderOrdinal: 1, 587 }, 588 { 589 Token: PLACEHOLDER, 590 Literal: ":foo", 591 HolderOrdinal: 2, 592 }, 593 }, 594 }, 595 { 596 Name: "Placeholders", 597 Input: "? :?", 598 ForPrepared: true, 599 Output: []scanResult{ 600 { 601 Token: PLACEHOLDER, 602 Literal: "?", 603 HolderOrdinal: 1, 604 }, 605 { 606 Token: ':', 607 Literal: ":", 608 }, 609 { 610 Token: PLACEHOLDER, 611 Literal: "?", 612 HolderOrdinal: 2, 613 }, 614 }, 615 }, 616 { 617 Name: "Placeholder Disabled", 618 Input: "?", 619 ForPrepared: false, 620 Output: []scanResult{ 621 { 622 Token: '?', 623 Literal: "?", 624 }, 625 }, 626 }, 627 { 628 Name: "Placeholder Disabled", 629 Input: ":foo", 630 ForPrepared: false, 631 Output: []scanResult{ 632 { 633 Token: ':', 634 Literal: ":", 635 }, 636 { 637 Token: IDENTIFIER, 638 Literal: "foo", 639 }, 640 }, 641 }, 642 } 643 644 func TestScanner_Scan(t *testing.T) { 645 for _, v := range scanTests { 646 s := new(Scanner).Init(v.Input, "", v.ForPrepared, v.AnsiQuotes) 647 648 tokenCount := 0 649 for { 650 token, err := s.Scan() 651 tokenCount++ 652 653 if err != nil { 654 if v.Error == "" { 655 t.Errorf("%s, token %d: unexpected error %q", v.Name, tokenCount, err.Error()) 656 } else if v.Error != err.Error() { 657 t.Errorf("%s, token %d: error %q, want error %q", v.Name, tokenCount, err.Error(), v.Error) 658 } 659 break 660 } 661 if v.Error != "" { 662 t.Errorf("%s, token %d: no error, want error %q", v.Name, tokenCount, v.Error) 663 break 664 } 665 666 if token.Token == EOF { 667 tokenCount-- 668 if tokenCount != len(v.Output) { 669 t.Errorf("%s: scan %d token(s) in a statement, want %d token(s)", v.Name, tokenCount, len(v.Output)) 670 } 671 break 672 } 673 674 if len(v.Output) < tokenCount { 675 t.Errorf("%s: scan %d token(s) in a statement, want %d token(s)", v.Name, tokenCount, len(v.Output)) 676 break 677 } 678 expect := v.Output[tokenCount-1] 679 if token.Token != expect.Token { 680 t.Errorf("%s, token %d: token = %s, want %s", v.Name, tokenCount, TokenLiteral(token.Token), TokenLiteral(expect.Token)) 681 } 682 if token.Literal != expect.Literal { 683 t.Errorf("%s, token %d: literal = %q, want %q", v.Name, tokenCount, token.Literal, expect.Literal) 684 } 685 if token.Quoted != expect.Quoted { 686 t.Errorf("%s, token %d: quoted = %t, want %t", v.Name, tokenCount, token.Quoted, expect.Quoted) 687 } 688 if token.HolderOrdinal != expect.HolderOrdinal { 689 t.Errorf("%s, token %d: holder ordinal = %d, want %d", v.Name, tokenCount, token.HolderOrdinal, expect.HolderOrdinal) 690 } 691 if 0 < expect.Line { 692 if token.Line != expect.Line { 693 t.Errorf("%s, token %d: line %d: want %d", v.Name, tokenCount, token.Line, expect.Line) 694 } 695 if token.Char != expect.Char { 696 t.Errorf("%s, token %d: char %d: want %d", v.Name, tokenCount, token.Char, expect.Char) 697 } 698 } 699 } 700 } 701 } 702 703 var tokenLiteralTests = map[int]string{ 704 SELECT: "SELECT", 705 43: "+", 706 } 707 708 func TestTokenLiteral(t *testing.T) { 709 for k, v := range tokenLiteralTests { 710 n := TokenLiteral(k) 711 if n != v { 712 t.Errorf("token literal = %q, want %q for %d", n, v, k) 713 } 714 } 715 }