github.com/TeaOSLab/EdgeNode@v1.3.8/internal/waf/injectionutils/libinjection/src/libinjection_sqli.c (about) 1 /** 2 * Copyright 2012,2016 Nick Galbreath 3 * nickg@client9.com 4 * BSD License -- see COPYING.txt for details 5 * 6 * https://libinjection.client9.com/ 7 * 8 */ 9 10 #include <string.h> 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <ctype.h> 14 #include <assert.h> 15 #include <stddef.h> 16 17 #include "libinjection.h" 18 #include "libinjection_sqli.h" 19 #include "libinjection_sqli_data.h" 20 21 #ifdef __clang_analyzer__ 22 // make clang analyzer happy by defining a dummy version 23 #define LIBINJECTION_VERSION "undefined" 24 #endif 25 26 #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val) 27 #define LIBINJECTION_SQLI_MAX_TOKENS 5 28 29 #ifndef TRUE 30 #define TRUE 1 31 #endif 32 #ifndef FALSE 33 #define FALSE 0 34 #endif 35 36 #define CHAR_NULL '\0' 37 #define CHAR_SINGLE '\'' 38 #define CHAR_DOUBLE '"' 39 #define CHAR_TICK '`' 40 41 /* faster than calling out to libc isdigit */ 42 #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9) 43 44 #if 0 45 #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left); 46 #else 47 #define FOLD_DEBUG 48 #endif 49 50 /* 51 * not making public just yet 52 */ 53 typedef enum { 54 TYPE_NONE = 0 55 , TYPE_KEYWORD = (int)'k' 56 , TYPE_UNION = (int)'U' 57 , TYPE_GROUP = (int)'B' 58 , TYPE_EXPRESSION = (int)'E' 59 , TYPE_SQLTYPE = (int)'t' 60 , TYPE_FUNCTION = (int)'f' 61 , TYPE_BAREWORD = (int)'n' 62 , TYPE_NUMBER = (int)'1' 63 , TYPE_VARIABLE = (int)'v' 64 , TYPE_STRING = (int)'s' 65 , TYPE_OPERATOR = (int)'o' 66 , TYPE_LOGIC_OPERATOR = (int)'&' 67 , TYPE_COMMENT = (int)'c' 68 , TYPE_COLLATE = (int)'A' 69 , TYPE_LEFTPARENS = (int)'(' 70 , TYPE_RIGHTPARENS = (int)')' /* not used? */ 71 , TYPE_LEFTBRACE = (int)'{' 72 , TYPE_RIGHTBRACE = (int)'}' 73 , TYPE_DOT = (int)'.' 74 , TYPE_COMMA = (int)',' 75 , TYPE_COLON = (int)':' 76 , TYPE_SEMICOLON = (int)';' 77 , TYPE_TSQL = (int)'T' /* TSQL start */ 78 , TYPE_UNKNOWN = (int)'?' 79 , TYPE_EVIL = (int)'X' /* unparsable, abort */ 80 , TYPE_FINGERPRINT = (int)'F' /* not really a token */ 81 , TYPE_BACKSLASH = (int)'\\' 82 } sqli_token_types; 83 84 /** 85 * Initializes parsing state 86 * 87 */ 88 static char flag2delim(int flag) 89 { 90 if (flag & FLAG_QUOTE_SINGLE) { 91 return CHAR_SINGLE; 92 } else if (flag & FLAG_QUOTE_DOUBLE) { 93 return CHAR_DOUBLE; 94 } else { 95 return CHAR_NULL; 96 } 97 } 98 99 /* memchr2 finds a string of 2 characters inside another string 100 * This a specialized version of "memmem" or "memchr". 101 * 'memmem' doesn't exist on all platforms 102 * 103 * Porting notes: this is just a special version of 104 * astring.find("AB") 105 * 106 */ 107 static const char * 108 memchr2(const char *haystack, size_t haystack_len, char c0, char c1) 109 { 110 const char *cur = haystack; 111 const char *last = haystack + haystack_len - 1; 112 113 if (haystack_len < 2) { 114 return NULL; 115 } 116 117 while (cur < last) { 118 /* safe since cur < len - 1 always */ 119 if (cur[0] == c0 && cur[1] == c1) { 120 return cur; 121 } 122 cur += 1; 123 } 124 125 return NULL; 126 } 127 128 /** 129 * memmem might not exist on some systems 130 */ 131 static const char * 132 my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen) 133 { 134 const char* cur; 135 const char* last; 136 assert(haystack); 137 assert(needle); 138 assert(nlen > 1); 139 last = haystack + hlen - nlen; 140 for (cur = haystack; cur <= last; ++cur) { 141 if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) { 142 return cur; 143 } 144 } 145 return NULL; 146 } 147 148 /** Find largest string containing certain characters. 149 * 150 * C Standard library 'strspn' only works for 'c-strings' (null terminated) 151 * This works on arbitrary length. 152 * 153 * Performance notes: 154 * not critical 155 * 156 * Porting notes: 157 * if accept is 'ABC', then this function would be similar to 158 * a_regexp.match(a_str, '[ABC]*'), 159 */ 160 static size_t 161 strlenspn(const char *s, size_t len, const char *accept) 162 { 163 size_t i; 164 for (i = 0; i < len; ++i) { 165 /* likely we can do better by inlining this function 166 * but this works for now 167 */ 168 if (strchr(accept, s[i]) == NULL) { 169 return i; 170 } 171 } 172 return len; 173 } 174 175 static size_t 176 strlencspn(const char *s, size_t len, const char *accept) 177 { 178 size_t i; 179 for (i = 0; i < len; ++i) { 180 /* likely we can do better by inlining this function 181 * but this works for now 182 */ 183 if (strchr(accept, s[i]) != NULL) { 184 return i; 185 } 186 } 187 return len; 188 } 189 static int char_is_white(char ch) { 190 /* ' ' space is 0x32 191 '\t 0x09 \011 horizontal tab 192 '\n' 0x0a \012 new line 193 '\v' 0x0b \013 vertical tab 194 '\f' 0x0c \014 new page 195 '\r' 0x0d \015 carriage return 196 0x00 \000 null (oracle) 197 0xa0 \240 is Latin-1 198 */ 199 return strchr(" \t\n\v\f\r\240\000", ch) != NULL; 200 } 201 202 /* DANGER DANGER 203 * This is -very specialized function- 204 * 205 * this compares a ALL_UPPER CASE C STRING 206 * with a *arbitrary memory* + length 207 * 208 * Sane people would just make a copy, up-case 209 * and use a hash table. 210 * 211 * Required since libc version uses the current locale 212 * and is much slower. 213 */ 214 static int cstrcasecmp(const char *a, const char *b, size_t n) 215 { 216 char cb; 217 218 for (; n > 0; a++, b++, n--) { 219 cb = *b; 220 if (cb >= 'a' && cb <= 'z') { 221 cb -= 0x20; 222 } 223 if (*a != cb) { 224 return *a - cb; 225 } else if (*a == '\0') { 226 return -1; 227 } 228 } 229 230 return (*a == 0) ? 0 : 1; 231 } 232 233 /** 234 * Case sensitive string compare. 235 * Here only to make code more readable 236 */ 237 static int streq(const char *a, const char *b) 238 { 239 return strcmp(a, b) == 0; 240 } 241 242 /** 243 * 244 * 245 * 246 * Porting Notes: 247 * given a mapping/hash of string to char 248 * this is just 249 * typecode = mapping[key.upper()] 250 */ 251 252 static char bsearch_keyword_type(const char *key, size_t len, 253 const keyword_t * keywords, size_t numb) 254 { 255 size_t pos; 256 size_t left = 0; 257 size_t right = numb - 1; 258 259 while (left < right) { 260 pos = (left + right) >> 1; 261 262 /* arg0 = upper case only, arg1 = mixed case */ 263 if (cstrcasecmp(keywords[pos].word, key, len) < 0) { 264 left = pos + 1; 265 } else { 266 right = pos; 267 } 268 } 269 if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) { 270 return keywords[left].type; 271 } else { 272 return CHAR_NULL; 273 } 274 } 275 276 static char is_keyword(const char* key, size_t len) 277 { 278 return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz); 279 } 280 281 /* st_token methods 282 * 283 * The following functions manipulates the stoken_t type 284 * 285 * 286 */ 287 288 static void st_clear(stoken_t * st) 289 { 290 memset(st, 0, sizeof(stoken_t)); 291 } 292 293 static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len, 294 const char value) 295 { 296 /* done to eliminate unused warning */ 297 (void)len; 298 st->type = (char) stype; 299 st->pos = pos; 300 st->len = 1; 301 st->val[0] = value; 302 st->val[1] = CHAR_NULL; 303 } 304 305 static void st_assign(stoken_t * st, const char stype, 306 size_t pos, size_t len, const char* value) 307 { 308 const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE; 309 size_t last = len < MSIZE ? len : (MSIZE - 1); 310 st->type = (char) stype; 311 st->pos = pos; 312 st->len = last; 313 memcpy(st->val, value, last); 314 st->val[last] = CHAR_NULL; 315 } 316 317 static void st_copy(stoken_t * dest, const stoken_t * src) 318 { 319 memcpy(dest, src, sizeof(stoken_t)); 320 } 321 322 static int st_is_arithmetic_op(const stoken_t* st) 323 { 324 const char ch = st->val[0]; 325 return (st->type == TYPE_OPERATOR && st->len == 1 && 326 (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%')); 327 } 328 329 static int st_is_unary_op(const stoken_t * st) 330 { 331 const char* str = st->val; 332 const size_t len = st->len; 333 334 if (st->type != TYPE_OPERATOR) { 335 return FALSE; 336 } 337 338 switch (len) { 339 case 1: 340 return *str == '+' || *str == '-' || *str == '!' || *str == '~'; 341 case 2: 342 return str[0] == '!' && str[1] == '!'; 343 case 3: 344 return cstrcasecmp("NOT", str, 3) == 0; 345 default: 346 return FALSE; 347 } 348 } 349 350 /* Parsers 351 * 352 * 353 */ 354 355 static size_t parse_white(struct libinjection_sqli_state * sf) 356 { 357 return sf->pos + 1; 358 } 359 360 static size_t parse_operator1(struct libinjection_sqli_state * sf) 361 { 362 const char *cs = sf->s; 363 size_t pos = sf->pos; 364 365 st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]); 366 return pos + 1; 367 } 368 369 static size_t parse_other(struct libinjection_sqli_state * sf) 370 { 371 const char *cs = sf->s; 372 size_t pos = sf->pos; 373 374 st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]); 375 return pos + 1; 376 } 377 378 static size_t parse_char(struct libinjection_sqli_state * sf) 379 { 380 const char *cs = sf->s; 381 size_t pos = sf->pos; 382 383 st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]); 384 return pos + 1; 385 } 386 387 static size_t parse_eol_comment(struct libinjection_sqli_state * sf) 388 { 389 const char *cs = sf->s; 390 const size_t slen = sf->slen; 391 size_t pos = sf->pos; 392 393 const char *endpos = 394 (const char *) memchr((const void *) (cs + pos), '\n', slen - pos); 395 if (endpos == NULL) { 396 st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos); 397 return slen; 398 } else { 399 st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos); 400 return (size_t)((endpos - cs) + 1); 401 } 402 } 403 404 /** In ANSI mode, hash is an operator 405 * In MYSQL mode, it's a EOL comment like '--' 406 */ 407 static size_t parse_hash(struct libinjection_sqli_state * sf) 408 { 409 sf->stats_comment_hash += 1; 410 if (sf->flags & FLAG_SQL_MYSQL) { 411 sf->stats_comment_hash += 1; 412 return parse_eol_comment(sf); 413 } else { 414 st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#'); 415 return sf->pos + 1; 416 } 417 } 418 419 static size_t parse_dash(struct libinjection_sqli_state * sf) 420 { 421 const char *cs = sf->s; 422 const size_t slen = sf->slen; 423 size_t pos = sf->pos; 424 425 /* 426 * five cases 427 * 1) --[white] this is always a SQL comment 428 * 2) --[EOF] this is a comment 429 * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators 430 * 4) --[notwhite] everyone else thinks this is a comment 431 * 5) -[not dash] '-' is a unary operator 432 */ 433 434 if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) { 435 return parse_eol_comment(sf); 436 } else if (pos +2 == slen && cs[pos + 1] == '-') { 437 return parse_eol_comment(sf); 438 } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) { 439 /* --[not-white] not-white case: 440 * 441 */ 442 sf->stats_comment_ddx += 1; 443 return parse_eol_comment(sf); 444 } else { 445 st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-'); 446 return pos + 1; 447 } 448 } 449 450 451 /** This detects MySQL comments, comments that 452 * start with /x! We just ban these now but 453 * previously we attempted to parse the inside 454 * 455 * For reference: 456 * the form of /x![anything]x/ or /x!12345[anything] x/ 457 * 458 * Mysql 3 (maybe 4), allowed this: 459 * /x!0selectx/ 1; 460 * where 0 could be any number. 461 * 462 * The last version of MySQL 3 was in 2003. 463 464 * It is unclear if the MySQL 3 syntax was allowed 465 * in MySQL 4. The last version of MySQL 4 was in 2008 466 * 467 */ 468 static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos) 469 { 470 /* so far... 471 * cs[pos] == '/' && cs[pos+1] == '*' 472 */ 473 474 if (pos + 2 >= len) { 475 /* not a mysql comment */ 476 return 0; 477 } 478 479 if (cs[pos + 2] != '!') { 480 /* not a mysql comment */ 481 return 0; 482 } 483 484 /* 485 * this is a mysql comment 486 * got "/x!" 487 */ 488 return 1; 489 } 490 491 static size_t parse_slash(struct libinjection_sqli_state * sf) 492 { 493 const char* ptr; 494 size_t clen; 495 const char *cs = sf->s; 496 const size_t slen = sf->slen; 497 size_t pos = sf->pos; 498 const char* cur = cs + pos; 499 char ctype = TYPE_COMMENT; 500 size_t pos1 = pos + 1; 501 if (pos1 == slen || cs[pos1] != '*') { 502 return parse_operator1(sf); 503 } 504 505 /* 506 * skip over initial '/x' 507 */ 508 ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/'); 509 if (ptr == NULL) { 510 /* till end of line */ 511 clen = slen - pos; 512 } else { 513 clen = (size_t)(ptr + 2 - cur); 514 } 515 516 /* 517 * postgresql allows nested comments which makes 518 * this is incompatible with parsing so 519 * if we find a '/x' inside the coment, then 520 * make a new token. 521 * 522 * Also, Mysql's "conditional" comments for version 523 * are an automatic black ban! 524 */ 525 526 if ( 527 ptr != NULL && 528 memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL 529 ) { 530 ctype = TYPE_EVIL; 531 } else if (is_mysql_comment(cs, slen, pos)) { 532 ctype = TYPE_EVIL; 533 } 534 535 st_assign(sf->current, ctype, pos, clen, cs + pos); 536 return pos + clen; 537 } 538 539 540 static size_t parse_backslash(struct libinjection_sqli_state * sf) 541 { 542 const char *cs = sf->s; 543 const size_t slen = sf->slen; 544 size_t pos = sf->pos; 545 546 /* 547 * Weird MySQL alias for NULL, "\N" (capital N only) 548 */ 549 if (pos + 1 < slen && cs[pos +1] == 'N') { 550 st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos); 551 return pos + 2; 552 } else { 553 st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]); 554 return pos + 1; 555 } 556 } 557 558 static size_t parse_operator2(struct libinjection_sqli_state * sf) 559 { 560 char ch; 561 const char *cs = sf->s; 562 const size_t slen = sf->slen; 563 size_t pos = sf->pos; 564 565 if (pos + 1 >= slen) { 566 return parse_operator1(sf); 567 } 568 569 if (pos + 2 < slen && 570 cs[pos] == '<' && 571 cs[pos + 1] == '=' && 572 cs[pos + 2] == '>') { 573 /* 574 * special 3-char operator 575 */ 576 st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos); 577 return pos + 3; 578 } 579 580 ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2); 581 if (ch != CHAR_NULL) { 582 st_assign(sf->current, ch, pos, 2, cs+pos); 583 return pos + 2; 584 } 585 586 /* 587 * not an operator.. what to do with the two 588 * characters we got? 589 */ 590 591 if (cs[pos] == ':') { 592 /* ':' is not an operator */ 593 st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos); 594 return pos + 1; 595 } else { 596 /* 597 * must be a single char operator 598 */ 599 return parse_operator1(sf); 600 } 601 } 602 603 /* 604 * Ok! " \" " one backslash = escaped! 605 * " \\" " two backslash = not escaped! 606 * "\\\" " three backslash = escaped! 607 */ 608 #ifndef __clang_analyzer__ 609 static int is_backslash_escaped(const char* end, const char* start) 610 { 611 const char* ptr; 612 /* Code not to be analyzed by clang. 613 * 614 * Why we do this? Because there is a false positive here: 615 * libinjection_sqli.c:608:13: warning: Out of bound memory access (access exceeds upper limit of memory block) [alpha.security.ArrayBoundV2] 616 * if (*ptr != '\\') { 617 * ^~~~ 618 * Specifically, this function deals with non-null terminated char arrays. This can be added 619 * as prerequisite, and is not written clearly. But the math in the for below holds. 620 */ 621 for (ptr = end; ptr >= start; ptr--) { 622 if (*ptr != '\\') { 623 break; 624 } 625 } 626 /* if number of backslashes is odd, it is escaped */ 627 return (end - ptr) & 1; 628 } 629 #endif 630 631 static size_t is_double_delim_escaped(const char* cur, const char* end) 632 { 633 return ((cur + 1) < end) && *(cur+1) == *cur; 634 } 635 636 /* Look forward for doubling of delimiter 637 * 638 * case 'foo''bar' --> foo''bar 639 * 640 * ending quote isn't duplicated (i.e. escaped) 641 * since it's the wrong char or EOL 642 * 643 */ 644 static size_t parse_string_core(const char *cs, const size_t len, size_t pos, 645 stoken_t * st, char delim, size_t offset) 646 { 647 /* 648 * offset is to skip the perhaps first quote char 649 */ 650 const char *qpos = 651 (const char *) memchr((const void *) (cs + pos + offset), delim, 652 len - pos - offset); 653 654 /* 655 * then keep string open/close info 656 */ 657 if (offset > 0) { 658 /* 659 * this is real quote 660 */ 661 st->str_open = delim; 662 } else { 663 /* 664 * this was a simulated quote 665 */ 666 st->str_open = CHAR_NULL; 667 } 668 669 while (TRUE) { 670 if (qpos == NULL) { 671 /* 672 * string ended with no trailing quote 673 * assign what we have 674 */ 675 st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset); 676 st->str_close = CHAR_NULL; 677 return len; 678 } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) { 679 /* keep going, move ahead one character */ 680 qpos = 681 (const char *) memchr((const void *) (qpos + 1), delim, 682 (size_t)((cs + len) - (qpos + 1))); 683 continue; 684 } else if (is_double_delim_escaped(qpos, cs + len)) { 685 /* keep going, move ahead two characters */ 686 qpos = 687 (const char *) memchr((const void *) (qpos + 2), delim, 688 (size_t)((cs + len) - (qpos + 2))); 689 continue; 690 } else { 691 /* hey it's a normal string */ 692 st_assign(st, TYPE_STRING, pos + offset, 693 (size_t)(qpos - (cs + pos + offset)), cs + pos + offset); 694 st->str_close = delim; 695 return (size_t)(qpos - cs + 1); 696 } 697 } 698 } 699 700 /** 701 * Used when first char is a ' or " 702 */ 703 static size_t parse_string(struct libinjection_sqli_state * sf) 704 { 705 const char *cs = sf->s; 706 const size_t slen = sf->slen; 707 size_t pos = sf->pos; 708 709 /* 710 * assert cs[pos] == single or double quote 711 */ 712 return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1); 713 } 714 715 /** 716 * Used when first char is: 717 * N or n: mysql "National Character set" 718 * E : psql "Escaped String" 719 */ 720 static size_t parse_estring(struct libinjection_sqli_state * sf) 721 { 722 const char *cs = sf->s; 723 const size_t slen = sf->slen; 724 size_t pos = sf->pos; 725 726 if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) { 727 return parse_word(sf); 728 } 729 return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2); 730 } 731 732 static size_t parse_ustring(struct libinjection_sqli_state * sf) 733 { 734 const char *cs = sf->s; 735 size_t slen = sf->slen; 736 size_t pos = sf->pos; 737 738 if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') { 739 sf->pos += 2; 740 pos = parse_string(sf); 741 sf->current->str_open = 'u'; 742 if (sf->current->str_close == '\'') { 743 sf->current->str_close = 'u'; 744 } 745 return pos; 746 } else { 747 return parse_word(sf); 748 } 749 } 750 751 static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset) 752 { 753 char ch; 754 const char *strend; 755 const char *cs = sf->s; 756 size_t slen = sf->slen; 757 size_t pos = sf->pos + offset; 758 759 /* if we are already at end of string.. 760 if current char is not q or Q 761 if we don't have 2 more chars 762 if char2 != a single quote 763 then, just treat as word 764 */ 765 if (pos >= slen || 766 (cs[pos] != 'q' && cs[pos] != 'Q') || 767 pos + 2 >= slen || 768 cs[pos + 1] != '\'') { 769 return parse_word(sf); 770 } 771 772 ch = cs[pos + 2]; 773 774 /* the ch > 127 is un-needed since 775 * we assume char is signed 776 */ 777 if (ch < 33 /* || ch > 127 */) { 778 return parse_word(sf); 779 } 780 switch (ch) { 781 case '(' : ch = ')'; break; 782 case '[' : ch = ']'; break; 783 case '{' : ch = '}'; break; 784 case '<' : ch = '>'; break; 785 } 786 787 strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\''); 788 if (strend == NULL) { 789 st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3); 790 sf->current->str_open = 'q'; 791 sf->current->str_close = CHAR_NULL; 792 return slen; 793 } else { 794 st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3); 795 sf->current->str_open = 'q'; 796 sf->current->str_close = 'q'; 797 return (size_t)(strend - cs + 2); 798 } 799 } 800 801 /* 802 * Oracle's q string 803 */ 804 static size_t parse_qstring(struct libinjection_sqli_state * sf) 805 { 806 return parse_qstring_core(sf, 0); 807 } 808 809 /* 810 * mysql's N'STRING' or 811 * ... Oracle's nq string 812 */ 813 static size_t parse_nqstring(struct libinjection_sqli_state * sf) 814 { 815 size_t slen = sf->slen; 816 size_t pos = sf->pos; 817 if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) { 818 return parse_estring(sf); 819 } 820 return parse_qstring_core(sf, 1); 821 } 822 823 /* 824 * binary literal string 825 * re: [bB]'[01]*' 826 */ 827 static size_t parse_bstring(struct libinjection_sqli_state *sf) 828 { 829 size_t wlen; 830 const char *cs = sf->s; 831 size_t pos = sf->pos; 832 size_t slen = sf->slen; 833 834 /* need at least 2 more characters 835 * if next char isn't a single quote, then 836 * continue as normal word 837 */ 838 if (pos + 2 >= slen || cs[pos+1] != '\'') { 839 return parse_word(sf); 840 } 841 842 wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01"); 843 if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') { 844 return parse_word(sf); 845 } 846 st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos); 847 return pos + 2 + wlen + 1; 848 } 849 850 /* 851 * hex literal string 852 * re: [xX]'[0123456789abcdefABCDEF]*' 853 * mysql has requirement of having EVEN number of chars, 854 * but pgsql does not 855 */ 856 static size_t parse_xstring(struct libinjection_sqli_state *sf) 857 { 858 size_t wlen; 859 const char *cs = sf->s; 860 size_t pos = sf->pos; 861 size_t slen = sf->slen; 862 863 /* need at least 2 more characters 864 * if next char isn't a single quote, then 865 * continue as normal word 866 */ 867 if (pos + 2 >= slen || cs[pos+1] != '\'') { 868 return parse_word(sf); 869 } 870 871 wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef"); 872 if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') { 873 return parse_word(sf); 874 } 875 st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos); 876 return pos + 2 + wlen + 1; 877 } 878 879 /** 880 * This handles MS SQLSERVER bracket words 881 * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name 882 * 883 */ 884 static size_t parse_bword(struct libinjection_sqli_state * sf) 885 { 886 const char *cs = sf->s; 887 size_t pos = sf->pos; 888 const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos); 889 if (endptr == NULL) { 890 st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos); 891 return sf->slen; 892 } else { 893 st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos); 894 return (size_t)((endptr - cs) + 1); 895 } 896 } 897 898 static size_t parse_word(struct libinjection_sqli_state * sf) 899 { 900 char ch; 901 char delim; 902 size_t i; 903 const char *cs = sf->s; 904 size_t pos = sf->pos; 905 size_t wlen = strlencspn(cs + pos, sf->slen - pos, 906 " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000"); 907 908 st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos); 909 910 /* now we need to look inside what we good for "." and "`" 911 * and see if what is before is a keyword or not 912 */ 913 for (i =0; i < sf->current->len; ++i) { 914 delim = sf->current->val[i]; 915 if (delim == '.' || delim == '`') { 916 ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i); 917 if (ch != TYPE_NONE && ch != TYPE_BAREWORD) { 918 /* needed for swig */ 919 st_clear(sf->current); 920 /* 921 * we got something like "SELECT.1" 922 * or SELECT`column` 923 */ 924 st_assign(sf->current, ch, pos, i, cs + pos); 925 return pos + i; 926 } 927 } 928 } 929 930 /* 931 * do normal lookup with word including '.' 932 */ 933 if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) { 934 935 ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen); 936 if (ch == CHAR_NULL) { 937 ch = TYPE_BAREWORD; 938 } 939 sf->current->type = ch; 940 } 941 return pos + wlen; 942 } 943 944 /* MySQL backticks are a cross between string and 945 * and a bare word. 946 * 947 */ 948 static size_t parse_tick(struct libinjection_sqli_state* sf) 949 { 950 size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1); 951 952 /* we could check to see if start and end of 953 * of string are both "`", i.e. make sure we have 954 * matching set. `foo` vs. `foo 955 * but I don't think it matters much 956 */ 957 958 /* check value of string to see if it's a keyword, 959 * function, operator, etc 960 */ 961 char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len); 962 if (ch == TYPE_FUNCTION) { 963 /* if it's a function, then convert token */ 964 sf->current->type = TYPE_FUNCTION; 965 } else { 966 /* otherwise it's a 'n' type -- mysql treats 967 * everything as a bare word 968 */ 969 sf->current->type = TYPE_BAREWORD; 970 } 971 return pos; 972 } 973 974 static size_t parse_var(struct libinjection_sqli_state * sf) 975 { 976 size_t xlen; 977 const char *cs = sf->s; 978 const size_t slen = sf->slen; 979 size_t pos = sf->pos + 1; 980 981 /* 982 * var_count is only used to reconstruct 983 * the input. It counts the number of '@' 984 * seen 0 in the case of NULL, 1 or 2 985 */ 986 987 /* 988 * move past optional other '@' 989 */ 990 if (pos < slen && cs[pos] == '@') { 991 pos += 1; 992 sf->current->count = 2; 993 } else { 994 sf->current->count = 1; 995 } 996 997 /* 998 * MySQL allows @@`version` 999 */ 1000 if (pos < slen) { 1001 if (cs[pos] == '`') { 1002 sf->pos = pos; 1003 pos = parse_tick(sf); 1004 sf->current->type = TYPE_VARIABLE; 1005 return pos; 1006 } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) { 1007 sf->pos = pos; 1008 pos = parse_string(sf); 1009 sf->current->type = TYPE_VARIABLE; 1010 return pos; 1011 } 1012 } 1013 1014 1015 xlen = strlencspn(cs + pos, slen - pos, 1016 " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\""); 1017 if (xlen == 0) { 1018 st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos); 1019 return pos; 1020 } else { 1021 st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos); 1022 return pos + xlen; 1023 } 1024 } 1025 1026 static size_t parse_money(struct libinjection_sqli_state *sf) 1027 { 1028 size_t xlen; 1029 const char* strend; 1030 const char *cs = sf->s; 1031 const size_t slen = sf->slen; 1032 size_t pos = sf->pos; 1033 1034 if (pos + 1 == slen) { 1035 /* end of line */ 1036 st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$'); 1037 return slen; 1038 } 1039 1040 /* 1041 * $1,000.00 or $1.000,00 ok! 1042 * This also parses $....,,,111 but that's ok 1043 */ 1044 1045 xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,"); 1046 if (xlen == 0) { 1047 if (cs[pos + 1] == '$') { 1048 /* we have $$ .. find ending $$ and make string */ 1049 strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$'); 1050 if (strend == NULL) { 1051 /* fell off edge */ 1052 st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2); 1053 sf->current->str_open = '$'; 1054 sf->current->str_close = CHAR_NULL; 1055 return slen; 1056 } else { 1057 st_assign(sf->current, TYPE_STRING, pos + 2, 1058 (size_t)(strend - (cs + pos + 2)), cs + pos + 2); 1059 sf->current->str_open = '$'; 1060 sf->current->str_close = '$'; 1061 return (size_t)(strend - cs + 2); 1062 } 1063 } else { 1064 /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */ 1065 xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); 1066 if (xlen == 0) { 1067 /* hmm it's "$" _something_ .. just add $ and keep going*/ 1068 st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$'); 1069 return pos + 1; 1070 } 1071 /* we have $foobar????? */ 1072 /* is it $foobar$ */ 1073 if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') { 1074 /* not $foobar$, or fell off edge */ 1075 st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$'); 1076 return pos + 1; 1077 } 1078 1079 /* we have $foobar$ ... find it again */ 1080 strend = my_memmem(cs+pos+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2); 1081 1082 if (strend == NULL) { 1083 /* fell off edge */ 1084 st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2); 1085 sf->current->str_open = '$'; 1086 sf->current->str_close = CHAR_NULL; 1087 return slen; 1088 } else { 1089 /* got one */ 1090 st_assign(sf->current, TYPE_STRING, pos+xlen+2, 1091 (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2); 1092 sf->current->str_open = '$'; 1093 sf->current->str_close = '$'; 1094 return (size_t)((strend + xlen + 2) - cs); 1095 } 1096 } 1097 } else if (xlen == 1 && cs[pos + 1] == '.') { 1098 /* $. should parsed as a word */ 1099 return parse_word(sf); 1100 } else { 1101 st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos); 1102 return pos + 1 + xlen; 1103 } 1104 } 1105 1106 static size_t parse_number(struct libinjection_sqli_state * sf) 1107 { 1108 size_t xlen; 1109 size_t start; 1110 const char* digits = NULL; 1111 const char *cs = sf->s; 1112 const size_t slen = sf->slen; 1113 size_t pos = sf->pos; 1114 int have_e = 0; 1115 int have_exp = 0; 1116 1117 /* cs[pos] == '0' has 1/10 chance of being true, 1118 * while pos+1< slen is almost always true 1119 */ 1120 if (cs[pos] == '0' && pos + 1 < slen) { 1121 if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') { 1122 digits = "0123456789ABCDEFabcdef"; 1123 } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') { 1124 digits = "01"; 1125 } 1126 1127 if (digits) { 1128 xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits); 1129 if (xlen == 0) { 1130 st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos); 1131 return pos + 2; 1132 } else { 1133 st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos); 1134 return pos + 2 + xlen; 1135 } 1136 } 1137 } 1138 1139 start = pos; 1140 while (pos < slen && ISDIGIT(cs[pos])) { 1141 pos += 1; 1142 } 1143 1144 if (pos < slen && cs[pos] == '.') { 1145 pos += 1; 1146 while (pos < slen && ISDIGIT(cs[pos])) { 1147 pos += 1; 1148 } 1149 if (pos - start == 1) { 1150 /* only one character read so far */ 1151 st_assign_char(sf->current, TYPE_DOT, start, 1, '.'); 1152 return pos; 1153 } 1154 } 1155 1156 if (pos < slen) { 1157 if (cs[pos] == 'E' || cs[pos] == 'e') { 1158 have_e = 1; 1159 pos += 1; 1160 if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) { 1161 pos += 1; 1162 } 1163 while (pos < slen && ISDIGIT(cs[pos])) { 1164 have_exp = 1; 1165 pos += 1; 1166 } 1167 } 1168 } 1169 1170 /* oracle's ending float or double suffix 1171 * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891 1172 */ 1173 if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) { 1174 if (pos + 1 == slen) { 1175 /* line ends evaluate "... 1.2f$" as '1.2f' */ 1176 pos += 1; 1177 } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) { 1178 /* 1179 * easy case, evaluate "... 1.2f ... as '1.2f' 1180 */ 1181 pos += 1; 1182 } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') { 1183 /* 1184 * a bit of a hack but makes '1fUNION' parse as '1f UNION' 1185 */ 1186 pos += 1; 1187 } else { 1188 /* it's like "123FROM" */ 1189 /* parse as "123" only */ 1190 } 1191 } 1192 1193 if (have_e == 1 && have_exp == 0) { 1194 /* very special form of 1195 * "1234.e" 1196 * "10.10E" 1197 * ".E" 1198 * this is a WORD not a number!! */ 1199 st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start); 1200 } else { 1201 st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start); 1202 } 1203 return pos; 1204 } 1205 1206 /* 1207 * API to return version. This allows us to increment the version 1208 * without having to regenerated the SWIG (or other binding) in minor 1209 * releases. 1210 */ 1211 const char* libinjection_version(void) 1212 { 1213 return LIBINJECTION_VERSION; 1214 } 1215 1216 int libinjection_sqli_tokenize(struct libinjection_sqli_state *sf) 1217 { 1218 pt2Function fnptr; 1219 size_t *pos = &sf->pos; 1220 stoken_t *current = sf->current; 1221 const char *s = sf->s; 1222 const size_t slen = sf->slen; 1223 1224 if (slen == 0) { 1225 return FALSE; 1226 } 1227 1228 st_clear(current); 1229 sf->current = current; 1230 1231 /* 1232 * if we are at beginning of string 1233 * and in single-quote or double quote mode 1234 * then pretend the input starts with a quote 1235 */ 1236 if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) { 1237 *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0); 1238 sf->stats_tokens += 1; 1239 return TRUE; 1240 } 1241 1242 while (*pos < slen) { 1243 1244 /* 1245 * get current character 1246 */ 1247 const unsigned char ch = (unsigned char) (s[*pos]); 1248 1249 /* 1250 * look up the parser, and call it 1251 * 1252 * Porting Note: this is mapping of char to function 1253 * charparsers[ch]() 1254 */ 1255 fnptr = char_parse_map[ch]; 1256 1257 *pos = (*fnptr) (sf); 1258 1259 /* 1260 * 1261 */ 1262 if (current->type != CHAR_NULL) { 1263 sf->stats_tokens += 1; 1264 return TRUE; 1265 } 1266 } 1267 return FALSE; 1268 } 1269 1270 void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags) 1271 { 1272 if (flags == 0) { 1273 flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI; 1274 } 1275 1276 memset(sf, 0, sizeof(struct libinjection_sqli_state)); 1277 sf->s = s; 1278 sf->slen = len; 1279 sf->lookup = libinjection_sqli_lookup_word; 1280 sf->userdata = 0; 1281 sf->flags = flags; 1282 sf->current = &(sf->tokenvec[0]); 1283 } 1284 1285 void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags) 1286 { 1287 void *userdata = sf->userdata; 1288 ptr_lookup_fn lookup = sf->lookup; 1289 1290 if (flags == 0) { 1291 flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI; 1292 } 1293 libinjection_sqli_init(sf, sf->s, sf->slen, flags); 1294 sf->lookup = lookup; 1295 sf->userdata = userdata; 1296 } 1297 1298 void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata) 1299 { 1300 if (fn == NULL) { 1301 sf->lookup = libinjection_sqli_lookup_word; 1302 sf->userdata = (void*)(NULL); 1303 } else { 1304 sf->lookup = fn; 1305 sf->userdata = userdata; 1306 } 1307 } 1308 1309 /** See if two tokens can be merged since they are compound SQL phrases. 1310 * 1311 * This takes two tokens, and, if they are the right type, 1312 * merges their values together. Then checks to see if the 1313 * new value is special using the PHRASES mapping. 1314 * 1315 * Example: "UNION" + "ALL" ==> "UNION ALL" 1316 * 1317 * C Security Notes: this is safe to use C-strings (null-terminated) 1318 * since the types involved by definition do not have embedded nulls 1319 * (e.g. there is no keyword with embedded null) 1320 * 1321 * Porting Notes: since this is C, it's oddly complicated. 1322 * This is just: multikeywords[token.value + ' ' + token2.value] 1323 * 1324 */ 1325 static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b) 1326 { 1327 size_t sz1; 1328 size_t sz2; 1329 size_t sz3; 1330 char tmp[LIBINJECTION_SQLI_TOKEN_SIZE]; 1331 char ch; 1332 1333 /* first token is of right type? */ 1334 if (! 1335 (a->type == TYPE_KEYWORD || 1336 a->type == TYPE_BAREWORD || 1337 a->type == TYPE_OPERATOR || 1338 a->type == TYPE_UNION || 1339 a->type == TYPE_FUNCTION || 1340 a->type == TYPE_EXPRESSION || 1341 a->type == TYPE_TSQL || 1342 a->type == TYPE_SQLTYPE)) { 1343 return FALSE; 1344 } 1345 1346 if (! 1347 (b->type == TYPE_KEYWORD || 1348 b->type == TYPE_BAREWORD || 1349 b->type == TYPE_OPERATOR || 1350 b->type == TYPE_UNION || 1351 b->type == TYPE_FUNCTION || 1352 b->type == TYPE_EXPRESSION || 1353 b->type == TYPE_TSQL || 1354 b->type == TYPE_SQLTYPE || 1355 b->type == TYPE_LOGIC_OPERATOR)) { 1356 return FALSE; 1357 } 1358 1359 sz1 = a->len; 1360 sz2 = b->len; 1361 sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */ 1362 if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */ 1363 return FALSE; 1364 } 1365 /* 1366 * oddly annoying last.val + ' ' + current.val 1367 */ 1368 memcpy(tmp, a->val, sz1); 1369 tmp[sz1] = ' '; 1370 memcpy(tmp + sz1 + 1, b->val, sz2); 1371 tmp[sz3] = CHAR_NULL; 1372 ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3); 1373 1374 if (ch != CHAR_NULL) { 1375 st_assign(a, ch, a->pos, sz3, tmp); 1376 return TRUE; 1377 } else { 1378 return FALSE; 1379 } 1380 } 1381 1382 int libinjection_sqli_fold(struct libinjection_sqli_state * sf) 1383 { 1384 stoken_t last_comment; 1385 1386 /* POS is the position of where the NEXT token goes */ 1387 size_t pos = 0; 1388 1389 /* LEFT is a count of how many tokens that are already 1390 folded or processed (i.e. part of the fingerprint) */ 1391 size_t left = 0; 1392 1393 int more = 1; 1394 1395 st_clear(&last_comment); 1396 1397 /* Skip all initial comments, right-parens ( and unary operators 1398 * 1399 */ 1400 sf->current = &(sf->tokenvec[0]); 1401 while (more) { 1402 more = libinjection_sqli_tokenize(sf); 1403 if ( ! (sf->current->type == TYPE_COMMENT || 1404 sf->current->type == TYPE_LEFTPARENS || 1405 sf->current->type == TYPE_SQLTYPE || 1406 st_is_unary_op(sf->current))) { 1407 break; 1408 } 1409 } 1410 1411 if (! more) { 1412 /* If input was only comments, unary or (, then exit */ 1413 return 0; 1414 } else { 1415 /* it's some other token */ 1416 pos += 1; 1417 } 1418 1419 while (1) { 1420 FOLD_DEBUG; 1421 1422 /* do we have all the max number of tokens? if so do 1423 * some special cases for 5 tokens 1424 */ 1425 if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) { 1426 if ( 1427 ( 1428 sf->tokenvec[0].type == TYPE_NUMBER && 1429 (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) && 1430 sf->tokenvec[2].type == TYPE_LEFTPARENS && 1431 sf->tokenvec[3].type == TYPE_NUMBER && 1432 sf->tokenvec[4].type == TYPE_RIGHTPARENS 1433 ) || 1434 ( 1435 sf->tokenvec[0].type == TYPE_BAREWORD && 1436 sf->tokenvec[1].type == TYPE_OPERATOR && 1437 sf->tokenvec[2].type == TYPE_LEFTPARENS && 1438 (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) && 1439 sf->tokenvec[4].type == TYPE_RIGHTPARENS 1440 ) || 1441 ( 1442 sf->tokenvec[0].type == TYPE_NUMBER && 1443 sf->tokenvec[1].type == TYPE_RIGHTPARENS && 1444 sf->tokenvec[2].type == TYPE_COMMA && 1445 sf->tokenvec[3].type == TYPE_LEFTPARENS && 1446 sf->tokenvec[4].type == TYPE_NUMBER 1447 ) || 1448 ( 1449 sf->tokenvec[0].type == TYPE_BAREWORD && 1450 sf->tokenvec[1].type == TYPE_RIGHTPARENS && 1451 sf->tokenvec[2].type == TYPE_OPERATOR && 1452 sf->tokenvec[3].type == TYPE_LEFTPARENS && 1453 sf->tokenvec[4].type == TYPE_BAREWORD 1454 ) 1455 ) 1456 { 1457 if (pos > LIBINJECTION_SQLI_MAX_TOKENS) { 1458 st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS])); 1459 pos = 2; 1460 left = 0; 1461 } else { 1462 pos = 1; 1463 left = 0; 1464 } 1465 } 1466 } 1467 1468 if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) { 1469 left = pos; 1470 break; 1471 } 1472 1473 /* get up to two tokens */ 1474 while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) { 1475 sf->current = &(sf->tokenvec[pos]); 1476 more = libinjection_sqli_tokenize(sf); 1477 if (more) { 1478 if (sf->current->type == TYPE_COMMENT) { 1479 st_copy(&last_comment, sf->current); 1480 } else { 1481 last_comment.type = CHAR_NULL; 1482 pos += 1; 1483 } 1484 } 1485 } 1486 FOLD_DEBUG; 1487 /* did we get 2 tokens? if not then we are done */ 1488 if (pos - left < 2) { 1489 left = pos; 1490 continue; 1491 } 1492 1493 /* FOLD: "ss" -> "s" 1494 * "foo" "bar" is valid SQL 1495 * just ignore second string 1496 */ 1497 if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) { 1498 pos -= 1; 1499 sf->stats_folds += 1; 1500 continue; 1501 } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) { 1502 /* not sure how various engines handle 1503 * 'select 1;;drop table foo' or 1504 * 'select 1; /x foo x/; drop table foo' 1505 * to prevent surprises, just fold away repeated semicolons 1506 */ 1507 pos -= 1; 1508 sf->stats_folds += 1; 1509 continue; 1510 } else if ((sf->tokenvec[left].type == TYPE_OPERATOR || 1511 sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) && 1512 (st_is_unary_op(&sf->tokenvec[left+1]) || 1513 sf->tokenvec[left+1].type == TYPE_SQLTYPE)) { 1514 pos -= 1; 1515 sf->stats_folds += 1; 1516 left = 0; 1517 continue; 1518 } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS && 1519 st_is_unary_op(&sf->tokenvec[left+1])) { 1520 pos -= 1; 1521 sf->stats_folds += 1; 1522 if (left > 0) { 1523 left -= 1; 1524 } 1525 continue; 1526 } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) { 1527 pos -= 1; 1528 sf->stats_folds += 1; 1529 if (left > 0) { 1530 left -= 1; 1531 } 1532 continue; 1533 } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && 1534 sf->tokenvec[left+1].type == TYPE_FUNCTION && 1535 (sf->tokenvec[left+1].val[0] == 'I' || 1536 sf->tokenvec[left+1].val[0] == 'i' ) && 1537 (sf->tokenvec[left+1].val[1] == 'F' || 1538 sf->tokenvec[left+1].val[1] == 'f' )) { 1539 /* IF is normally a function, except in Transact-SQL where it can be used as a 1540 * standalone control flow operator, e.g. ; IF 1=1 ... 1541 * if found after a semicolon, convert from 'f' type to 'T' type 1542 */ 1543 sf->tokenvec[left+1].type = TYPE_TSQL; 1544 /* left += 2; */ 1545 continue; /* reparse everything, but we probably can advance left, and pos */ 1546 } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) && 1547 sf->tokenvec[left+1].type == TYPE_LEFTPARENS && ( 1548 /* TSQL functions but common enough to be column names */ 1549 cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1550 cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1551 1552 /* Function in MYSQL */ 1553 cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1554 cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1555 cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1556 1557 /* Mysql words that act as a variable and are a function */ 1558 1559 /* TSQL current_users is fake-variable */ 1560 /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */ 1561 cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1562 cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1563 cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1564 cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1565 cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1566 cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 1567 )) { 1568 1569 /* pos is the same 1570 * other conversions need to go here... for instance 1571 * password CAN be a function, coalesce CAN be a function 1572 */ 1573 sf->tokenvec[left].type = TYPE_FUNCTION; 1574 continue; 1575 } else if (sf->tokenvec[left].type == TYPE_KEYWORD && ( 1576 cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1577 cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 1578 )) { 1579 1580 if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { 1581 /* got .... IN ( ... (or 'NOT IN') 1582 * it's an operator 1583 */ 1584 sf->tokenvec[left].type = TYPE_OPERATOR; 1585 } else { 1586 /* 1587 * it's a nothing 1588 */ 1589 sf->tokenvec[left].type = TYPE_BAREWORD; 1590 } 1591 1592 /* "IN" can be used as "IN BOOLEAN MODE" for mysql 1593 * in which case merging of words can be done later 1594 * other wise it acts as an equality operator __ IN (values..) 1595 * 1596 * here we got "IN" "(" so it's an operator. 1597 * also back track to handle "NOT IN" 1598 * might need to do the same with like 1599 * two use cases "foo" LIKE "BAR" (normal operator) 1600 * "foo" = LIKE(1,2) 1601 */ 1602 continue; 1603 } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && ( 1604 cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || 1605 cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) { 1606 if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { 1607 /* SELECT LIKE(... 1608 * it's a function 1609 */ 1610 sf->tokenvec[left].type = TYPE_FUNCTION; 1611 } 1612 } else if (sf->tokenvec[left].type == TYPE_SQLTYPE && 1613 (sf->tokenvec[left+1].type == TYPE_BAREWORD || 1614 sf->tokenvec[left+1].type == TYPE_NUMBER || 1615 sf->tokenvec[left+1].type == TYPE_SQLTYPE || 1616 sf->tokenvec[left+1].type == TYPE_LEFTPARENS || 1617 sf->tokenvec[left+1].type == TYPE_FUNCTION || 1618 sf->tokenvec[left+1].type == TYPE_VARIABLE || 1619 sf->tokenvec[left+1].type == TYPE_STRING)) { 1620 st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]); 1621 pos -= 1; 1622 sf->stats_folds += 1; 1623 left = 0; 1624 continue; 1625 } else if (sf->tokenvec[left].type == TYPE_COLLATE && 1626 sf->tokenvec[left+1].type == TYPE_BAREWORD) { 1627 /* 1628 * there are too many collation types.. so if the bareword has a "_" 1629 * then it's TYPE_SQLTYPE 1630 */ 1631 if (strchr(sf->tokenvec[left+1].val, '_') != NULL) { 1632 sf->tokenvec[left+1].type = TYPE_SQLTYPE; 1633 left = 0; 1634 } 1635 } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) { 1636 if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) { 1637 /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */ 1638 sf->tokenvec[left].type = TYPE_NUMBER; 1639 } else { 1640 /* just ignore it.. Again T-SQL seems to parse \1 as "1" */ 1641 st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]); 1642 pos -= 1; 1643 sf->stats_folds += 1; 1644 } 1645 left = 0; 1646 continue; 1647 } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS && 1648 sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { 1649 pos -= 1; 1650 left = 0; 1651 sf->stats_folds += 1; 1652 continue; 1653 } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS && 1654 sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) { 1655 pos -= 1; 1656 left = 0; 1657 sf->stats_folds += 1; 1658 continue; 1659 } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE && 1660 sf->tokenvec[left+1].type == TYPE_BAREWORD) { 1661 1662 /* 1663 * MySQL Degenerate case -- 1664 * 1665 * select { ``.``.id }; -- valid !!! 1666 * select { ``.``.``.id }; -- invalid 1667 * select ``.``.id; -- invalid 1668 * select { ``.id }; -- invalid 1669 * 1670 * so it appears {``.``.id} is a magic case 1671 * I suspect this is "current database, current table, field id" 1672 * 1673 * The folding code can't look at more than 3 tokens, and 1674 * I don't want to make two passes. 1675 * 1676 * Since "{ ``" so rare, we are just going to blacklist it. 1677 * 1678 * Highly likely this will need revisiting! 1679 * 1680 * CREDIT @rsalgado 2013-11-25 1681 */ 1682 if (sf->tokenvec[left+1].len == 0) { 1683 sf->tokenvec[left+1].type = TYPE_EVIL; 1684 return (int)(left+2); 1685 } 1686 /* weird ODBC / MYSQL {foo expr} --> expr 1687 * but for this rule we just strip away the "{ foo" part 1688 */ 1689 left = 0; 1690 pos -= 2; 1691 sf->stats_folds += 2; 1692 continue; 1693 } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) { 1694 pos -= 1; 1695 left = 0; 1696 sf->stats_folds += 1; 1697 continue; 1698 } 1699 1700 /* all cases of handing 2 tokens is done 1701 and nothing matched. Get one more token 1702 */ 1703 FOLD_DEBUG; 1704 while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) { 1705 sf->current = &(sf->tokenvec[pos]); 1706 more = libinjection_sqli_tokenize(sf); 1707 if (more) { 1708 if (sf->current->type == TYPE_COMMENT) { 1709 st_copy(&last_comment, sf->current); 1710 } else { 1711 last_comment.type = CHAR_NULL; 1712 pos += 1; 1713 } 1714 } 1715 } 1716 1717 /* do we have three tokens? If not then we are done */ 1718 if (pos -left < 3) { 1719 left = pos; 1720 continue; 1721 } 1722 1723 /* 1724 * now look for three token folding 1725 */ 1726 if (sf->tokenvec[left].type == TYPE_NUMBER && 1727 sf->tokenvec[left+1].type == TYPE_OPERATOR && 1728 sf->tokenvec[left+2].type == TYPE_NUMBER) { 1729 pos -= 2; 1730 left = 0; 1731 continue; 1732 } else if (sf->tokenvec[left].type == TYPE_OPERATOR && 1733 sf->tokenvec[left+1].type != TYPE_LEFTPARENS && 1734 sf->tokenvec[left+2].type == TYPE_OPERATOR) { 1735 left = 0; 1736 pos -= 2; 1737 continue; 1738 } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR && 1739 sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) { 1740 pos -= 2; 1741 left = 0; 1742 continue; 1743 } else if (sf->tokenvec[left].type == TYPE_VARIABLE && 1744 sf->tokenvec[left+1].type == TYPE_OPERATOR && 1745 (sf->tokenvec[left+2].type == TYPE_VARIABLE || 1746 sf->tokenvec[left+2].type == TYPE_NUMBER || 1747 sf->tokenvec[left+2].type == TYPE_BAREWORD)) { 1748 pos -= 2; 1749 left = 0; 1750 continue; 1751 } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || 1752 sf->tokenvec[left].type == TYPE_NUMBER ) && 1753 sf->tokenvec[left+1].type == TYPE_OPERATOR && 1754 (sf->tokenvec[left+2].type == TYPE_NUMBER || 1755 sf->tokenvec[left+2].type == TYPE_BAREWORD)) { 1756 pos -= 2; 1757 left = 0; 1758 continue; 1759 } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || 1760 sf->tokenvec[left].type == TYPE_NUMBER || 1761 sf->tokenvec[left].type == TYPE_VARIABLE || 1762 sf->tokenvec[left].type == TYPE_STRING) && 1763 sf->tokenvec[left+1].type == TYPE_OPERATOR && 1764 streq(sf->tokenvec[left+1].val, "::") && 1765 sf->tokenvec[left+2].type == TYPE_SQLTYPE) { 1766 pos -= 2; 1767 left = 0; 1768 sf->stats_folds += 2; 1769 continue; 1770 } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || 1771 sf->tokenvec[left].type == TYPE_NUMBER || 1772 sf->tokenvec[left].type == TYPE_STRING || 1773 sf->tokenvec[left].type == TYPE_VARIABLE) && 1774 sf->tokenvec[left+1].type == TYPE_COMMA && 1775 (sf->tokenvec[left+2].type == TYPE_NUMBER || 1776 sf->tokenvec[left+2].type == TYPE_BAREWORD || 1777 sf->tokenvec[left+2].type == TYPE_STRING || 1778 sf->tokenvec[left+2].type == TYPE_VARIABLE)) { 1779 pos -= 2; 1780 left = 0; 1781 continue; 1782 } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION || 1783 sf->tokenvec[left].type == TYPE_GROUP || 1784 sf->tokenvec[left].type == TYPE_COMMA) && 1785 st_is_unary_op(&sf->tokenvec[left+1]) && 1786 sf->tokenvec[left+2].type == TYPE_LEFTPARENS) { 1787 /* got something like SELECT + (, LIMIT + ( 1788 * remove unary operator 1789 */ 1790 st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); 1791 pos -= 1; 1792 left = 0; 1793 continue; 1794 } else if ((sf->tokenvec[left].type == TYPE_KEYWORD || 1795 sf->tokenvec[left].type == TYPE_EXPRESSION || 1796 sf->tokenvec[left].type == TYPE_GROUP ) && 1797 st_is_unary_op(&sf->tokenvec[left+1]) && 1798 (sf->tokenvec[left+2].type == TYPE_NUMBER || 1799 sf->tokenvec[left+2].type == TYPE_BAREWORD || 1800 sf->tokenvec[left+2].type == TYPE_VARIABLE || 1801 sf->tokenvec[left+2].type == TYPE_STRING || 1802 sf->tokenvec[left+2].type == TYPE_FUNCTION )) { 1803 /* remove unary operators 1804 * select - 1 1805 */ 1806 st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); 1807 pos -= 1; 1808 left = 0; 1809 continue; 1810 } else if (sf->tokenvec[left].type == TYPE_COMMA && 1811 st_is_unary_op(&sf->tokenvec[left+1]) && 1812 (sf->tokenvec[left+2].type == TYPE_NUMBER || 1813 sf->tokenvec[left+2].type == TYPE_BAREWORD || 1814 sf->tokenvec[left+2].type == TYPE_VARIABLE || 1815 sf->tokenvec[left+2].type == TYPE_STRING)) { 1816 /* 1817 * interesting case turn ", -1" ->> ",1" PLUS we need to back up 1818 * one token if possible to see if more folding can be done 1819 * "1,-1" --> "1" 1820 */ 1821 st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); 1822 left = 0; 1823 /* pos is >= 3 so this is safe */ 1824 assert(pos >= 3); 1825 pos -= 3; 1826 continue; 1827 } else if (sf->tokenvec[left].type == TYPE_COMMA && 1828 st_is_unary_op(&sf->tokenvec[left+1]) && 1829 sf->tokenvec[left+2].type == TYPE_FUNCTION) { 1830 1831 /* Separate case from above since you end up with 1832 * 1,-sin(1) --> 1 (1) 1833 * Here, just do 1834 * 1,-sin(1) --> 1,sin(1) 1835 * just remove unary operator 1836 */ 1837 st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); 1838 pos -= 1; 1839 left = 0; 1840 continue; 1841 } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) && 1842 (sf->tokenvec[left+1].type == TYPE_DOT) && 1843 (sf->tokenvec[left+2].type == TYPE_BAREWORD)) { 1844 /* ignore the '.n' 1845 * typically is this databasename.table 1846 */ 1847 assert(pos >= 3); 1848 pos -= 2; 1849 left = 0; 1850 continue; 1851 } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) && 1852 (sf->tokenvec[left+1].type == TYPE_DOT) && 1853 (sf->tokenvec[left+2].type == TYPE_BAREWORD)) { 1854 /* select . `foo` --> select `foo` */ 1855 st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); 1856 pos -= 1; 1857 left = 0; 1858 continue; 1859 } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) && 1860 (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) && 1861 (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) { 1862 /* 1863 * whats going on here 1864 * Some SQL functions like USER() have 0 args 1865 * if we get User(foo), then User is not a function 1866 * This should be expanded since it eliminated a lot of false 1867 * positives. 1868 */ 1869 if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) { 1870 sf->tokenvec[left].type = TYPE_BAREWORD; 1871 } 1872 } 1873 1874 /* no folding -- assume left-most token is 1875 is good, now use the existing 2 tokens -- 1876 do not get another 1877 */ 1878 1879 left += 1; 1880 1881 } /* while(1) */ 1882 1883 /* if we have 4 or less tokens, and we had a comment token 1884 * at the end, add it back 1885 */ 1886 1887 if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) { 1888 st_copy(&sf->tokenvec[left], &last_comment); 1889 left += 1; 1890 } 1891 1892 /* sometimes we grab a 6th token to help 1893 determine the type of token 5. 1894 */ 1895 if (left > LIBINJECTION_SQLI_MAX_TOKENS) { 1896 left = LIBINJECTION_SQLI_MAX_TOKENS; 1897 } 1898 1899 return (int)left; 1900 } 1901 1902 /* secondary api: detects SQLi in a string, GIVEN a context. 1903 * 1904 * A context can be: 1905 * * CHAR_NULL (\0), process as is 1906 * * CHAR_SINGLE ('), process pretending input started with a 1907 * single quote. 1908 * * CHAR_DOUBLE ("), process pretending input started with a 1909 * double quote. 1910 * 1911 */ 1912 const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags) 1913 { 1914 int i; 1915 int tlen = 0; 1916 1917 libinjection_sqli_reset(sql_state, flags); 1918 1919 tlen = libinjection_sqli_fold(sql_state); 1920 1921 /* Check for magic PHP backquote comment 1922 * If: 1923 * * last token is of type "bareword" 1924 * * And is quoted in a backtick 1925 * * And isn't closed 1926 * * And it's empty? 1927 * Then convert it to comment 1928 */ 1929 if (tlen > 2 && 1930 sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD && 1931 sql_state->tokenvec[tlen-1].str_open == CHAR_TICK && 1932 sql_state->tokenvec[tlen-1].len == 0 && 1933 sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) { 1934 sql_state->tokenvec[tlen-1].type = TYPE_COMMENT; 1935 } 1936 1937 for (i = 0; i < tlen; ++i) { 1938 sql_state->fingerprint[i] = sql_state->tokenvec[i].type; 1939 } 1940 1941 /* 1942 * make the fingerprint pattern a c-string (null delimited) 1943 */ 1944 sql_state->fingerprint[tlen] = CHAR_NULL; 1945 1946 /* 1947 * check for 'X' in pattern, and then 1948 * clear out all tokens 1949 * 1950 * this means parsing could not be done 1951 * accurately due to pgsql's double comments 1952 * or other syntax that isn't consistent. 1953 * Should be very rare false positive 1954 */ 1955 if (strchr(sql_state->fingerprint, TYPE_EVIL)) { 1956 /* needed for SWIG */ 1957 memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1); 1958 memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE); 1959 1960 sql_state->fingerprint[0] = TYPE_EVIL; 1961 1962 sql_state->tokenvec[0].type = TYPE_EVIL; 1963 sql_state->tokenvec[0].val[0] = TYPE_EVIL; 1964 sql_state->tokenvec[1].type = CHAR_NULL; 1965 } 1966 1967 1968 return sql_state->fingerprint; 1969 } 1970 1971 int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state) 1972 { 1973 return libinjection_sqli_blacklist(sql_state) && 1974 libinjection_sqli_not_whitelist(sql_state); 1975 } 1976 1977 char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type, 1978 const char* str, size_t len) 1979 { 1980 if (lookup_type == LOOKUP_FINGERPRINT) { 1981 return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0'; 1982 } else { 1983 return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz); 1984 } 1985 } 1986 1987 int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state) 1988 { 1989 /* 1990 * use minimum of 8 bytes to make sure gcc -fstack-protector 1991 * works correctly 1992 */ 1993 char fp2[8]; 1994 char ch; 1995 size_t i; 1996 size_t len = strlen(sql_state->fingerprint); 1997 int patmatch; 1998 1999 if (len < 1) { 2000 sql_state->reason = __LINE__; 2001 return FALSE; 2002 } 2003 2004 /* 2005 to keep everything compatible, convert the 2006 v0 fingerprint pattern to v1 2007 v0: up to 5 chars, mixed case 2008 v1: 1 char is '0', up to 5 more chars, upper case 2009 */ 2010 2011 fp2[0] = '0'; 2012 for (i = 0; i < len; ++i) { 2013 ch = sql_state->fingerprint[i]; 2014 if (ch >= 'a' && ch <= 'z') { 2015 ch -= 0x20; 2016 } 2017 fp2[i+1] = ch; 2018 } 2019 fp2[i+1] = '\0'; 2020 2021 patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT; 2022 2023 /* 2024 * No match. 2025 * 2026 * Set sql_state->reason to current line number 2027 * only for debugging purposes. 2028 */ 2029 if (!patmatch) { 2030 sql_state->reason = __LINE__; 2031 return FALSE; 2032 } 2033 2034 return TRUE; 2035 } 2036 2037 /* 2038 * return TRUE if SQLi, false is benign 2039 */ 2040 int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state) 2041 { 2042 /* 2043 * We assume we got a SQLi match 2044 * This next part just helps reduce false positives. 2045 * 2046 */ 2047 char ch; 2048 size_t tlen = strlen(sql_state->fingerprint); 2049 2050 if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) { 2051 /* 2052 * if ending comment is contains 'sp_password' then it's SQLi! 2053 * MS Audit log apparently ignores anything with 2054 * 'sp_password' in it. Unable to find primary reference to 2055 * this "feature" of SQL Server but seems to be known SQLi 2056 * technique 2057 */ 2058 if (my_memmem(sql_state->s, sql_state->slen, 2059 "sp_password", strlen("sp_password"))) { 2060 sql_state->reason = __LINE__; 2061 return TRUE; 2062 } 2063 } 2064 2065 switch (tlen) { 2066 case 2:{ 2067 /* 2068 * case 2 are "very small SQLi" which make them 2069 * hard to tell from normal input... 2070 */ 2071 2072 if (sql_state->fingerprint[1] == TYPE_UNION) { 2073 if (sql_state->stats_tokens == 2) { 2074 /* not sure why but 1U comes up in SQLi attack 2075 * likely part of parameter splitting/etc. 2076 * lots of reasons why "1 union" might be normal 2077 * input, so beep only if other SQLi things are present 2078 */ 2079 /* it really is a number and 'union' 2080 * other wise it has folding or comments 2081 */ 2082 sql_state->reason = __LINE__; 2083 return FALSE; 2084 } else { 2085 sql_state->reason = __LINE__; 2086 return TRUE; 2087 } 2088 } 2089 /* 2090 * if 'comment' is '#' ignore.. too many FP 2091 */ 2092 if (sql_state->tokenvec[1].val[0] == '#') { 2093 sql_state->reason = __LINE__; 2094 return FALSE; 2095 } 2096 2097 /* 2098 * for fingerprint like 'nc', only comments of /x are treated 2099 * as SQL... ending comments of "--" and "#" are not SQLi 2100 */ 2101 if (sql_state->tokenvec[0].type == TYPE_BAREWORD && 2102 sql_state->tokenvec[1].type == TYPE_COMMENT && 2103 sql_state->tokenvec[1].val[0] != '/') { 2104 sql_state->reason = __LINE__; 2105 return FALSE; 2106 } 2107 2108 /* 2109 * if '1c' ends with '/x' then it's SQLi 2110 */ 2111 if (sql_state->tokenvec[0].type == TYPE_NUMBER && 2112 sql_state->tokenvec[1].type == TYPE_COMMENT && 2113 sql_state->tokenvec[1].val[0] == '/') { 2114 return TRUE; 2115 } 2116 2117 /** 2118 * there are some odd base64-looking query string values 2119 * 1234-ABCDEFEhfhihwuefi-- 2120 * which evaluate to "1c"... these are not SQLi 2121 * but 1234-- probably is. 2122 * Make sure the "1" in "1c" is actually a true decimal number 2123 * 2124 * Need to check -original- string since the folding step 2125 * may have merged tokens, e.g. "1+FOO" is folded into "1" 2126 * 2127 * Note: evasion: 1*1-- 2128 */ 2129 if (sql_state->tokenvec[0].type == TYPE_NUMBER && 2130 sql_state->tokenvec[1].type == TYPE_COMMENT) { 2131 if (sql_state->stats_tokens > 2) { 2132 /* we have some folding going on, highly likely SQLi */ 2133 sql_state->reason = __LINE__; 2134 return TRUE; 2135 } 2136 /* 2137 * we check that next character after the number is either whitespace, 2138 * or '/' or a '-' ==> SQLi. 2139 */ 2140 ch = sql_state->s[sql_state->tokenvec[0].len]; 2141 if ( ch <= 32 ) { 2142 /* next char was whitespace,e.g. "1234 --" 2143 * this isn't exactly correct.. ideally we should skip over all whitespace 2144 * but this seems to be ok for now 2145 */ 2146 return TRUE; 2147 } 2148 if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') { 2149 return TRUE; 2150 } 2151 if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') { 2152 return TRUE; 2153 } 2154 2155 sql_state->reason = __LINE__; 2156 return FALSE; 2157 } 2158 2159 /* 2160 * detect obvious SQLi scans.. many people put '--' in plain text 2161 * so only detect if input ends with '--', e.g. 1-- but not 1-- foo 2162 */ 2163 if ((sql_state->tokenvec[1].len > 2) 2164 && sql_state->tokenvec[1].val[0] == '-') { 2165 sql_state->reason = __LINE__; 2166 return FALSE; 2167 } 2168 2169 break; 2170 } /* case 2 */ 2171 case 3:{ 2172 /* 2173 * ...foo' + 'bar... 2174 * no opening quote, no closing quote 2175 * and each string has data 2176 */ 2177 2178 if (streq(sql_state->fingerprint, "sos") 2179 || streq(sql_state->fingerprint, "s&s")) { 2180 2181 if ((sql_state->tokenvec[0].str_open == CHAR_NULL) 2182 && (sql_state->tokenvec[2].str_close == CHAR_NULL) 2183 && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) { 2184 /* 2185 * if ....foo" + "bar.... 2186 */ 2187 sql_state->reason = __LINE__; 2188 return TRUE; 2189 } 2190 if (sql_state->stats_tokens == 3) { 2191 sql_state->reason = __LINE__; 2192 return FALSE; 2193 } 2194 2195 /* 2196 * not SQLi 2197 */ 2198 sql_state->reason = __LINE__; 2199 return FALSE; 2200 } else if (streq(sql_state->fingerprint, "s&n") || 2201 streq(sql_state->fingerprint, "n&1") || 2202 streq(sql_state->fingerprint, "1&1") || 2203 streq(sql_state->fingerprint, "1&v") || 2204 streq(sql_state->fingerprint, "1&s")) { 2205 /* 'sexy and 17' not SQLi 2206 * 'sexy and 17<18' SQLi 2207 */ 2208 if (sql_state->stats_tokens == 3) { 2209 sql_state->reason = __LINE__; 2210 return FALSE; 2211 } 2212 } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) { 2213 if ((sql_state->tokenvec[1].len < 5) || 2214 cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) { 2215 /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL) 2216 * then treat as safe 2217 */ 2218 sql_state->reason = __LINE__; 2219 return FALSE; 2220 } 2221 } 2222 break; 2223 } /* case 3 */ 2224 case 4: 2225 case 5: { 2226 /* nothing right now */ 2227 break; 2228 } /* case 5 */ 2229 } /* end switch */ 2230 2231 return TRUE; 2232 } 2233 2234 /** Main API, detects SQLi in an input. 2235 * 2236 * 2237 */ 2238 static int reparse_as_mysql(struct libinjection_sqli_state * sql_state) 2239 { 2240 return sql_state->stats_comment_ddx || 2241 sql_state->stats_comment_hash; 2242 } 2243 2244 /* 2245 * This function is mostly use with SWIG 2246 */ 2247 struct libinjection_sqli_token* 2248 libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i) 2249 { 2250 if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) { 2251 return NULL; 2252 } 2253 return &(sql_state->tokenvec[i]); 2254 } 2255 2256 int libinjection_is_sqli(struct libinjection_sqli_state * sql_state) 2257 { 2258 const char *s = sql_state->s; 2259 size_t slen = sql_state->slen; 2260 2261 /* 2262 * no input? not SQLi 2263 */ 2264 if (slen == 0) { 2265 return FALSE; 2266 } 2267 2268 /* 2269 * test input "as-is" 2270 */ 2271 libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI); 2272 if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, 2273 sql_state->fingerprint, strlen(sql_state->fingerprint))) { 2274 return TRUE; 2275 } else if (reparse_as_mysql(sql_state)) { 2276 libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL); 2277 if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, 2278 sql_state->fingerprint, strlen(sql_state->fingerprint))) { 2279 return TRUE; 2280 } 2281 } 2282 2283 /* 2284 * if input has a single_quote, then 2285 * test as if input was actually ' 2286 * example: if input if "1' = 1", then pretend it's 2287 * "'1' = 1" 2288 * Porting Notes: example the same as doing 2289 * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg) 2290 * 2291 */ 2292 if (memchr(s, CHAR_SINGLE, slen)) { 2293 libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI); 2294 if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, 2295 sql_state->fingerprint, strlen(sql_state->fingerprint))) { 2296 return TRUE; 2297 } else if (reparse_as_mysql(sql_state)) { 2298 libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL); 2299 if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, 2300 sql_state->fingerprint, strlen(sql_state->fingerprint))) { 2301 return TRUE; 2302 } 2303 } 2304 } 2305 2306 /* 2307 * same as above but with a double-quote " 2308 */ 2309 if (memchr(s, CHAR_DOUBLE, slen)) { 2310 libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL); 2311 if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, 2312 sql_state->fingerprint, strlen(sql_state->fingerprint))) { 2313 return TRUE; 2314 } 2315 } 2316 2317 /* 2318 * Hurray, input is not SQLi 2319 */ 2320 return FALSE; 2321 } 2322 2323 int libinjection_sqli(const char* s, size_t slen, char fingerprint[]) 2324 { 2325 int issqli; 2326 struct libinjection_sqli_state state; 2327 2328 libinjection_sqli_init(&state, s, slen, 0); 2329 issqli = libinjection_is_sqli(&state); 2330 if (issqli) { 2331 strcpy(fingerprint, state.fingerprint); 2332 } else { 2333 fingerprint[0] = '\0'; 2334 } 2335 return issqli; 2336 }