github.com/moontrade/nogc@v0.1.7/net/http/picohttpparser.c (about) 1 /* 2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 3 * Shigeo Mitsunari 4 * 5 * The software is licensed under either the MIT License (below) or the Perl 6 * license. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a copy 9 * of this software and associated documentation files (the "Software"), to 10 * deal in the Software without restriction, including without limitation the 11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12 * sell copies of the Software, and to permit persons to whom the Software is 13 * furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 */ 26 27 #include <assert.h> 28 #include <stddef.h> 29 #include <string.h> 30 #ifdef __SSE4_2__ 31 #ifdef _MSC_VER 32 #include <nmmintrin.h> 33 #else 34 #include <x86intrin.h> 35 #endif 36 #endif 37 #include "picohttpparser.h" 38 39 #if __GNUC__ >= 3 40 #define likely(x) __builtin_expect(!!(x), 1) 41 #define unlikely(x) __builtin_expect(!!(x), 0) 42 #else 43 #define likely(x) (x) 44 #define unlikely(x) (x) 45 #endif 46 47 #ifdef _MSC_VER 48 #define ALIGNED(n) _declspec(align(n)) 49 #else 50 #define ALIGNED(n) __attribute__((aligned(n))) 51 #endif 52 53 #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) 54 55 #define CHECK_EOF() \ 56 if (buf == buf_end) { \ 57 *ret = -2; \ 58 return NULL; \ 59 } 60 61 #define EXPECT_CHAR_NO_CHECK(ch) \ 62 if (*buf++ != ch) { \ 63 *ret = -1; \ 64 return NULL; \ 65 } 66 67 #define EXPECT_CHAR(ch) \ 68 CHECK_EOF(); \ 69 EXPECT_CHAR_NO_CHECK(ch); 70 71 #define ADVANCE_TOKEN(tok, toklen) \ 72 do { \ 73 const char *tok_start = buf; \ 74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \ 75 int found2; \ 76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \ 77 if (!found2) { \ 78 CHECK_EOF(); \ 79 } \ 80 while (1) { \ 81 if (*buf == ' ') { \ 82 break; \ 83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ 84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \ 85 *ret = -1; \ 86 return NULL; \ 87 } \ 88 } \ 89 ++buf; \ 90 CHECK_EOF(); \ 91 } \ 92 tok = tok_start; \ 93 toklen = buf - tok_start; \ 94 } while (0) 95 96 static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" 98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" 99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" 100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 104 105 static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) 106 { 107 *found = 0; 108 #if __SSE4_2__ 109 if (likely(buf_end - buf >= 16)) { 110 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); 111 112 size_t left = (buf_end - buf) & ~15; 113 do { 114 __m128i b16 = _mm_loadu_si128((const __m128i *)buf); 115 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 116 if (unlikely(r != 16)) { 117 buf += r; 118 *found = 1; 119 break; 120 } 121 buf += 16; 122 left -= 16; 123 } while (likely(left != 0)); 124 } 125 #else 126 /* suppress unused parameter warning */ 127 (void)buf_end; 128 (void)ranges; 129 (void)ranges_size; 130 #endif 131 return buf; 132 } 133 134 static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) 135 { 136 const char *token_start = buf; 137 138 #ifdef __SSE4_2__ 139 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */ 140 "\012\037" /* allow SP and up to but not including DEL */ 141 "\177\177"; /* allow chars w. MSB set */ 142 int found; 143 buf = findchar_fast(buf, buf_end, ranges1, 6, &found); 144 if (found) 145 goto FOUND_CTL; 146 #else 147 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ 148 while (likely(buf_end - buf >= 8)) { 149 #define DOIT() \ 150 do { \ 151 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ 152 goto NonPrintable; \ 153 ++buf; \ 154 } while (0) 155 DOIT(); 156 DOIT(); 157 DOIT(); 158 DOIT(); 159 DOIT(); 160 DOIT(); 161 DOIT(); 162 DOIT(); 163 #undef DOIT 164 continue; 165 NonPrintable: 166 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 167 goto FOUND_CTL; 168 } 169 ++buf; 170 } 171 #endif 172 for (;; ++buf) { 173 CHECK_EOF(); 174 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { 175 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 176 goto FOUND_CTL; 177 } 178 } 179 } 180 FOUND_CTL: 181 if (likely(*buf == '\015')) { 182 ++buf; 183 EXPECT_CHAR('\012'); 184 *token_len = buf - 2 - token_start; 185 } else if (*buf == '\012') { 186 *token_len = buf - token_start; 187 ++buf; 188 } else { 189 *ret = -1; 190 return NULL; 191 } 192 *token = token_start; 193 194 return buf; 195 } 196 197 static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) 198 { 199 int ret_cnt = 0; 200 buf = last_len < 3 ? buf : buf + last_len - 3; 201 202 while (1) { 203 CHECK_EOF(); 204 if (*buf == '\015') { 205 ++buf; 206 CHECK_EOF(); 207 EXPECT_CHAR('\012'); 208 ++ret_cnt; 209 } else if (*buf == '\012') { 210 ++buf; 211 ++ret_cnt; 212 } else { 213 ++buf; 214 ret_cnt = 0; 215 } 216 if (ret_cnt == 2) { 217 return buf; 218 } 219 } 220 221 *ret = -2; 222 return NULL; 223 } 224 225 #define PARSE_INT(valp_, mul_) \ 226 if (*buf < '0' || '9' < *buf) { \ 227 buf++; \ 228 *ret = -1; \ 229 return NULL; \ 230 } \ 231 *(valp_) = (mul_) * (*buf++ - '0'); 232 233 #define PARSE_INT_3(valp_) \ 234 do { \ 235 int res_ = 0; \ 236 PARSE_INT(&res_, 100) \ 237 *valp_ = res_; \ 238 PARSE_INT(&res_, 10) \ 239 *valp_ += res_; \ 240 PARSE_INT(&res_, 1) \ 241 *valp_ += res_; \ 242 } while (0) 243 244 /* returned pointer is always within [buf, buf_end), or null */ 245 static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char, 246 int *ret) 247 { 248 /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128 249 * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */ 250 static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */ 251 "\"\"" /* 0x22 */ 252 "()" /* 0x28,0x29 */ 253 ",," /* 0x2c */ 254 "//" /* 0x2f */ 255 ":@" /* 0x3a-0x40 */ 256 "[]" /* 0x5b-0x5d */ 257 "{\xff"; /* 0x7b-0xff */ 258 const char *buf_start = buf; 259 int found; 260 buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found); 261 if (!found) { 262 CHECK_EOF(); 263 } 264 while (1) { 265 if (*buf == next_char) { 266 break; 267 } else if (!token_char_map[(unsigned char)*buf]) { 268 *ret = -1; 269 return NULL; 270 } 271 ++buf; 272 CHECK_EOF(); 273 } 274 *token = buf_start; 275 *token_len = buf - buf_start; 276 return buf; 277 } 278 279 /* returned pointer is always within [buf, buf_end), or null */ 280 static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) 281 { 282 /* we want at least [HTTP/1.<two chars>] to try to parse */ 283 if (buf_end - buf < 9) { 284 *ret = -2; 285 return NULL; 286 } 287 EXPECT_CHAR_NO_CHECK('H'); 288 EXPECT_CHAR_NO_CHECK('T'); 289 EXPECT_CHAR_NO_CHECK('T'); 290 EXPECT_CHAR_NO_CHECK('P'); 291 EXPECT_CHAR_NO_CHECK('/'); 292 EXPECT_CHAR_NO_CHECK('1'); 293 EXPECT_CHAR_NO_CHECK('.'); 294 PARSE_INT(minor_version, 1); 295 return buf; 296 } 297 298 static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, 299 size_t max_headers, int *ret) 300 { 301 for (;; ++*num_headers) { 302 CHECK_EOF(); 303 if (*buf == '\015') { 304 ++buf; 305 EXPECT_CHAR('\012'); 306 break; 307 } else if (*buf == '\012') { 308 ++buf; 309 break; 310 } 311 if (*num_headers == max_headers) { 312 *ret = -1; 313 return NULL; 314 } 315 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { 316 /* parsing name, but do not discard SP before colon, see 317 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ 318 if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) { 319 return NULL; 320 } 321 if (headers[*num_headers].name_len == 0) { 322 *ret = -1; 323 return NULL; 324 } 325 ++buf; 326 for (;; ++buf) { 327 CHECK_EOF(); 328 if (!(*buf == ' ' || *buf == '\t')) { 329 break; 330 } 331 } 332 } else { 333 headers[*num_headers].name = NULL; 334 headers[*num_headers].name_len = 0; 335 } 336 const char *value; 337 size_t value_len; 338 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) { 339 return NULL; 340 } 341 /* remove trailing SPs and HTABs */ 342 const char *value_end = value + value_len; 343 for (; value_end != value; --value_end) { 344 const char c = *(value_end - 1); 345 if (!(c == ' ' || c == '\t')) { 346 break; 347 } 348 } 349 headers[*num_headers].value = value; 350 headers[*num_headers].value_len = value_end - value; 351 } 352 return buf; 353 } 354 355 static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, 356 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, 357 size_t max_headers, int *ret) 358 { 359 /* skip first empty line (some clients add CRLF after POST content) */ 360 CHECK_EOF(); 361 if (*buf == '\015') { 362 ++buf; 363 EXPECT_CHAR('\012'); 364 } else if (*buf == '\012') { 365 ++buf; 366 } 367 368 /* parse request line */ 369 if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) { 370 return NULL; 371 } 372 do { 373 ++buf; 374 CHECK_EOF(); 375 } while (*buf == ' '); 376 ADVANCE_TOKEN(*path, *path_len); 377 do { 378 ++buf; 379 CHECK_EOF(); 380 } while (*buf == ' '); 381 if (*method_len == 0 || *path_len == 0) { 382 *ret = -1; 383 return NULL; 384 } 385 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 386 return NULL; 387 } 388 if (*buf == '\015') { 389 ++buf; 390 EXPECT_CHAR('\012'); 391 } else if (*buf == '\012') { 392 ++buf; 393 } else { 394 *ret = -1; 395 return NULL; 396 } 397 398 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 399 } 400 401 int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, 402 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) 403 { 404 const char *buf = buf_start, *buf_end = buf_start + len; 405 size_t max_headers = *num_headers; 406 int r; 407 408 *method = NULL; 409 *method_len = 0; 410 *path = NULL; 411 *path_len = 0; 412 *minor_version = -1; 413 *num_headers = 0; 414 415 /* if last_len != 0, check if the request is complete (a fast countermeasure 416 againt slowloris */ 417 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 418 return r; 419 } 420 421 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, 422 &r)) == NULL) { 423 return r; 424 } 425 426 return (int)(buf - buf_start); 427 } 428 429 static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, 430 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) 431 { 432 /* parse "HTTP/1.x" */ 433 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 434 return NULL; 435 } 436 /* skip space */ 437 if (*buf != ' ') { 438 *ret = -1; 439 return NULL; 440 } 441 do { 442 ++buf; 443 CHECK_EOF(); 444 } while (*buf == ' '); 445 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */ 446 if (buf_end - buf < 4) { 447 *ret = -2; 448 return NULL; 449 } 450 PARSE_INT_3(status); 451 452 /* get message including preceding space */ 453 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { 454 return NULL; 455 } 456 if (*msg_len == 0) { 457 /* ok */ 458 } else if (**msg == ' ') { 459 /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP 460 * before running past the end of the given buffer. */ 461 do { 462 ++*msg; 463 --*msg_len; 464 } while (**msg == ' '); 465 } else { 466 /* garbage found after status code */ 467 *ret = -1; 468 return NULL; 469 } 470 471 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 472 } 473 474 int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, 475 struct phr_header *headers, size_t *num_headers, size_t last_len) 476 { 477 const char *buf = buf_start, *buf_end = buf + len; 478 size_t max_headers = *num_headers; 479 int r; 480 481 *minor_version = -1; 482 *status = 0; 483 *msg = NULL; 484 *msg_len = 0; 485 *num_headers = 0; 486 487 /* if last_len != 0, check if the response is complete (a fast countermeasure 488 against slowloris */ 489 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 490 return r; 491 } 492 493 if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { 494 return r; 495 } 496 497 return (int)(buf - buf_start); 498 } 499 500 int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) 501 { 502 const char *buf = buf_start, *buf_end = buf + len; 503 size_t max_headers = *num_headers; 504 int r; 505 506 *num_headers = 0; 507 508 /* if last_len != 0, check if the response is complete (a fast countermeasure 509 against slowloris */ 510 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 511 return r; 512 } 513 514 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { 515 return r; 516 } 517 518 return (int)(buf - buf_start); 519 } 520 521 enum { 522 CHUNKED_IN_CHUNK_SIZE, 523 CHUNKED_IN_CHUNK_EXT, 524 CHUNKED_IN_CHUNK_DATA, 525 CHUNKED_IN_CHUNK_CRLF, 526 CHUNKED_IN_TRAILERS_LINE_HEAD, 527 CHUNKED_IN_TRAILERS_LINE_MIDDLE 528 }; 529 530 static int decode_hex(int ch) 531 { 532 if ('0' <= ch && ch <= '9') { 533 return ch - '0'; 534 } else if ('A' <= ch && ch <= 'F') { 535 return ch - 'A' + 0xa; 536 } else if ('a' <= ch && ch <= 'f') { 537 return ch - 'a' + 0xa; 538 } else { 539 return -1; 540 } 541 } 542 543 ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) 544 { 545 size_t dst = 0, src = 0, bufsz = *_bufsz; 546 ssize_t ret = -2; /* incomplete */ 547 548 while (1) { 549 switch (decoder->_state) { 550 case CHUNKED_IN_CHUNK_SIZE: 551 for (;; ++src) { 552 int v; 553 if (src == bufsz) 554 goto Exit; 555 if ((v = decode_hex(buf[src])) == -1) { 556 if (decoder->_hex_count == 0) { 557 ret = -1; 558 goto Exit; 559 } 560 break; 561 } 562 if (decoder->_hex_count == sizeof(size_t) * 2) { 563 ret = -1; 564 goto Exit; 565 } 566 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; 567 ++decoder->_hex_count; 568 } 569 decoder->_hex_count = 0; 570 decoder->_state = CHUNKED_IN_CHUNK_EXT; 571 /* fallthru */ 572 case CHUNKED_IN_CHUNK_EXT: 573 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ 574 for (;; ++src) { 575 if (src == bufsz) 576 goto Exit; 577 if (buf[src] == '\012') 578 break; 579 } 580 ++src; 581 if (decoder->bytes_left_in_chunk == 0) { 582 if (decoder->consume_trailer) { 583 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 584 break; 585 } else { 586 goto Complete; 587 } 588 } 589 decoder->_state = CHUNKED_IN_CHUNK_DATA; 590 /* fallthru */ 591 case CHUNKED_IN_CHUNK_DATA: { 592 size_t avail = bufsz - src; 593 if (avail < decoder->bytes_left_in_chunk) { 594 if (dst != src) 595 memmove(buf + dst, buf + src, avail); 596 src += avail; 597 dst += avail; 598 decoder->bytes_left_in_chunk -= avail; 599 goto Exit; 600 } 601 if (dst != src) 602 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); 603 src += decoder->bytes_left_in_chunk; 604 dst += decoder->bytes_left_in_chunk; 605 decoder->bytes_left_in_chunk = 0; 606 decoder->_state = CHUNKED_IN_CHUNK_CRLF; 607 } 608 /* fallthru */ 609 case CHUNKED_IN_CHUNK_CRLF: 610 for (;; ++src) { 611 if (src == bufsz) 612 goto Exit; 613 if (buf[src] != '\015') 614 break; 615 } 616 if (buf[src] != '\012') { 617 ret = -1; 618 goto Exit; 619 } 620 ++src; 621 decoder->_state = CHUNKED_IN_CHUNK_SIZE; 622 break; 623 case CHUNKED_IN_TRAILERS_LINE_HEAD: 624 for (;; ++src) { 625 if (src == bufsz) 626 goto Exit; 627 if (buf[src] != '\015') 628 break; 629 } 630 if (buf[src++] == '\012') 631 goto Complete; 632 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; 633 /* fallthru */ 634 case CHUNKED_IN_TRAILERS_LINE_MIDDLE: 635 for (;; ++src) { 636 if (src == bufsz) 637 goto Exit; 638 if (buf[src] == '\012') 639 break; 640 } 641 ++src; 642 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 643 break; 644 default: 645 assert(!"decoder is corrupt"); 646 } 647 } 648 649 Complete: 650 ret = bufsz - src; 651 Exit: 652 if (dst != src) 653 memmove(buf + dst, buf + src, bufsz - src); 654 *_bufsz = dst; 655 return ret; 656 } 657 658 int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) 659 { 660 return decoder->_state == CHUNKED_IN_CHUNK_DATA; 661 } 662 663 #undef CHECK_EOF 664 #undef EXPECT_CHAR 665 #undef ADVANCE_TOKEN