github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/net/textproto/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textproto 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 "io" 12 "strconv" 13 "strings" 14 "sync" 15 ) 16 17 // A Reader implements convenience methods for reading requests 18 // or responses from a text protocol network connection. 19 type Reader struct { 20 R *bufio.Reader 21 dot *dotReader 22 buf []byte // a re-usable buffer for readContinuedLineSlice 23 } 24 25 // NewReader returns a new Reader reading from r. 26 // 27 // To avoid denial of service attacks, the provided bufio.Reader 28 // should be reading from an io.LimitReader or similar Reader to bound 29 // the size of responses. 30 func NewReader(r *bufio.Reader) *Reader { 31 return &Reader{R: r} 32 } 33 34 // ReadLine reads a single line from r, 35 // eliding the final \n or \r\n from the returned string. 36 func (r *Reader) ReadLine() (string, error) { 37 line, err := r.readLineSlice() 38 return string(line), err 39 } 40 41 // ReadLineBytes is like ReadLine but returns a []byte instead of a string. 42 func (r *Reader) ReadLineBytes() ([]byte, error) { 43 line, err := r.readLineSlice() 44 if line != nil { 45 line = bytes.Clone(line) 46 } 47 return line, err 48 } 49 50 func (r *Reader) readLineSlice() ([]byte, error) { 51 r.closeDot() 52 var line []byte 53 for { 54 l, more, err := r.R.ReadLine() 55 if err != nil { 56 return nil, err 57 } 58 // Avoid the copy if the first call produced a full line. 59 if line == nil && !more { 60 return l, nil 61 } 62 line = append(line, l...) 63 if !more { 64 break 65 } 66 } 67 return line, nil 68 } 69 70 // ReadContinuedLine reads a possibly continued line from r, 71 // eliding the final trailing ASCII white space. 72 // Lines after the first are considered continuations if they 73 // begin with a space or tab character. In the returned data, 74 // continuation lines are separated from the previous line 75 // only by a single space: the newline and leading white space 76 // are removed. 77 // 78 // For example, consider this input: 79 // 80 // Line 1 81 // continued... 82 // Line 2 83 // 84 // The first call to ReadContinuedLine will return "Line 1 continued..." 85 // and the second will return "Line 2". 86 // 87 // Empty lines are never continued. 88 func (r *Reader) ReadContinuedLine() (string, error) { 89 line, err := r.readContinuedLineSlice(noValidation) 90 return string(line), err 91 } 92 93 // trim returns s with leading and trailing spaces and tabs removed. 94 // It does not assume Unicode or UTF-8. 95 func trim(s []byte) []byte { 96 i := 0 97 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 98 i++ 99 } 100 n := len(s) 101 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') { 102 n-- 103 } 104 return s[i:n] 105 } 106 107 // ReadContinuedLineBytes is like ReadContinuedLine but 108 // returns a []byte instead of a string. 109 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { 110 line, err := r.readContinuedLineSlice(noValidation) 111 if line != nil { 112 line = bytes.Clone(line) 113 } 114 return line, err 115 } 116 117 // readContinuedLineSlice reads continued lines from the reader buffer, 118 // returning a byte slice with all lines. The validateFirstLine function 119 // is run on the first read line, and if it returns an error then this 120 // error is returned from readContinuedLineSlice. 121 func (r *Reader) readContinuedLineSlice(validateFirstLine func([]byte) error) ([]byte, error) { 122 if validateFirstLine == nil { 123 return nil, fmt.Errorf("missing validateFirstLine func") 124 } 125 126 // Read the first line. 127 line, err := r.readLineSlice() 128 if err != nil { 129 return nil, err 130 } 131 if len(line) == 0 { // blank line - no continuation 132 return line, nil 133 } 134 135 if err := validateFirstLine(line); err != nil { 136 return nil, err 137 } 138 139 // Optimistically assume that we have started to buffer the next line 140 // and it starts with an ASCII letter (the next header key), or a blank 141 // line, so we can avoid copying that buffered data around in memory 142 // and skipping over non-existent whitespace. 143 if r.R.Buffered() > 1 { 144 peek, _ := r.R.Peek(2) 145 if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') || 146 len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' { 147 return trim(line), nil 148 } 149 } 150 151 // ReadByte or the next readLineSlice will flush the read buffer; 152 // copy the slice into buf. 153 r.buf = append(r.buf[:0], trim(line)...) 154 155 // Read continuation lines. 156 for r.skipSpace() > 0 { 157 line, err := r.readLineSlice() 158 if err != nil { 159 break 160 } 161 r.buf = append(r.buf, ' ') 162 r.buf = append(r.buf, trim(line)...) 163 } 164 return r.buf, nil 165 } 166 167 // skipSpace skips R over all spaces and returns the number of bytes skipped. 168 func (r *Reader) skipSpace() int { 169 n := 0 170 for { 171 c, err := r.R.ReadByte() 172 if err != nil { 173 // Bufio will keep err until next read. 174 break 175 } 176 if c != ' ' && c != '\t' { 177 r.R.UnreadByte() 178 break 179 } 180 n++ 181 } 182 return n 183 } 184 185 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { 186 line, err := r.ReadLine() 187 if err != nil { 188 return 189 } 190 return parseCodeLine(line, expectCode) 191 } 192 193 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { 194 if len(line) < 4 || line[3] != ' ' && line[3] != '-' { 195 err = ProtocolError("short response: " + line) 196 return 197 } 198 continued = line[3] == '-' 199 code, err = strconv.Atoi(line[0:3]) 200 if err != nil || code < 100 { 201 err = ProtocolError("invalid response code: " + line) 202 return 203 } 204 message = line[4:] 205 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || 206 10 <= expectCode && expectCode < 100 && code/10 != expectCode || 207 100 <= expectCode && expectCode < 1000 && code != expectCode { 208 err = &Error{code, message} 209 } 210 return 211 } 212 213 // ReadCodeLine reads a response code line of the form 214 // 215 // code message 216 // 217 // where code is a three-digit status code and the message 218 // extends to the rest of the line. An example of such a line is: 219 // 220 // 220 plan9.bell-labs.com ESMTP 221 // 222 // If the prefix of the status does not match the digits in expectCode, 223 // ReadCodeLine returns with err set to &Error{code, message}. 224 // For example, if expectCode is 31, an error will be returned if 225 // the status is not in the range [310,319]. 226 // 227 // If the response is multi-line, ReadCodeLine returns an error. 228 // 229 // An expectCode <= 0 disables the check of the status code. 230 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { 231 code, continued, message, err := r.readCodeLine(expectCode) 232 if err == nil && continued { 233 err = ProtocolError("unexpected multi-line response: " + message) 234 } 235 return 236 } 237 238 // ReadResponse reads a multi-line response of the form: 239 // 240 // code-message line 1 241 // code-message line 2 242 // ... 243 // code message line n 244 // 245 // where code is a three-digit status code. The first line starts with the 246 // code and a hyphen. The response is terminated by a line that starts 247 // with the same code followed by a space. Each line in message is 248 // separated by a newline (\n). 249 // 250 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for 251 // details of another form of response accepted: 252 // 253 // code-message line 1 254 // message line 2 255 // ... 256 // code message line n 257 // 258 // If the prefix of the status does not match the digits in expectCode, 259 // ReadResponse returns with err set to &Error{code, message}. 260 // For example, if expectCode is 31, an error will be returned if 261 // the status is not in the range [310,319]. 262 // 263 // An expectCode <= 0 disables the check of the status code. 264 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { 265 code, continued, message, err := r.readCodeLine(expectCode) 266 multi := continued 267 for continued { 268 line, err := r.ReadLine() 269 if err != nil { 270 return 0, "", err 271 } 272 273 var code2 int 274 var moreMessage string 275 code2, continued, moreMessage, err = parseCodeLine(line, 0) 276 if err != nil || code2 != code { 277 message += "\n" + strings.TrimRight(line, "\r\n") 278 continued = true 279 continue 280 } 281 message += "\n" + moreMessage 282 } 283 if err != nil && multi && message != "" { 284 // replace one line error message with all lines (full message) 285 err = &Error{code, message} 286 } 287 return 288 } 289 290 // DotReader returns a new Reader that satisfies Reads using the 291 // decoded text of a dot-encoded block read from r. 292 // The returned Reader is only valid until the next call 293 // to a method on r. 294 // 295 // Dot encoding is a common framing used for data blocks 296 // in text protocols such as SMTP. The data consists of a sequence 297 // of lines, each of which ends in "\r\n". The sequence itself 298 // ends at a line containing just a dot: ".\r\n". Lines beginning 299 // with a dot are escaped with an additional dot to avoid 300 // looking like the end of the sequence. 301 // 302 // The decoded form returned by the Reader's Read method 303 // rewrites the "\r\n" line endings into the simpler "\n", 304 // removes leading dot escapes if present, and stops with error io.EOF 305 // after consuming (and discarding) the end-of-sequence line. 306 func (r *Reader) DotReader() io.Reader { 307 r.closeDot() 308 r.dot = &dotReader{r: r} 309 return r.dot 310 } 311 312 type dotReader struct { 313 r *Reader 314 state int 315 } 316 317 // Read satisfies reads by decoding dot-encoded data read from d.r. 318 func (d *dotReader) Read(b []byte) (n int, err error) { 319 // Run data through a simple state machine to 320 // elide leading dots, rewrite trailing \r\n into \n, 321 // and detect ending .\r\n line. 322 const ( 323 stateBeginLine = iota // beginning of line; initial state; must be zero 324 stateDot // read . at beginning of line 325 stateDotCR // read .\r at beginning of line 326 stateCR // read \r (possibly at end of line) 327 stateData // reading data in middle of line 328 stateEOF // reached .\r\n end marker line 329 ) 330 br := d.r.R 331 for n < len(b) && d.state != stateEOF { 332 var c byte 333 c, err = br.ReadByte() 334 if err != nil { 335 if err == io.EOF { 336 err = io.ErrUnexpectedEOF 337 } 338 break 339 } 340 switch d.state { 341 case stateBeginLine: 342 if c == '.' { 343 d.state = stateDot 344 continue 345 } 346 if c == '\r' { 347 d.state = stateCR 348 continue 349 } 350 d.state = stateData 351 352 case stateDot: 353 if c == '\r' { 354 d.state = stateDotCR 355 continue 356 } 357 if c == '\n' { 358 d.state = stateEOF 359 continue 360 } 361 d.state = stateData 362 363 case stateDotCR: 364 if c == '\n' { 365 d.state = stateEOF 366 continue 367 } 368 // Not part of .\r\n. 369 // Consume leading dot and emit saved \r. 370 br.UnreadByte() 371 c = '\r' 372 d.state = stateData 373 374 case stateCR: 375 if c == '\n' { 376 d.state = stateBeginLine 377 break 378 } 379 // Not part of \r\n. Emit saved \r 380 br.UnreadByte() 381 c = '\r' 382 d.state = stateData 383 384 case stateData: 385 if c == '\r' { 386 d.state = stateCR 387 continue 388 } 389 if c == '\n' { 390 d.state = stateBeginLine 391 } 392 } 393 b[n] = c 394 n++ 395 } 396 if err == nil && d.state == stateEOF { 397 err = io.EOF 398 } 399 if err != nil && d.r.dot == d { 400 d.r.dot = nil 401 } 402 return 403 } 404 405 // closeDot drains the current DotReader if any, 406 // making sure that it reads until the ending dot line. 407 func (r *Reader) closeDot() { 408 if r.dot == nil { 409 return 410 } 411 buf := make([]byte, 128) 412 for r.dot != nil { 413 // When Read reaches EOF or an error, 414 // it will set r.dot == nil. 415 r.dot.Read(buf) 416 } 417 } 418 419 // ReadDotBytes reads a dot-encoding and returns the decoded data. 420 // 421 // See the documentation for the DotReader method for details about dot-encoding. 422 func (r *Reader) ReadDotBytes() ([]byte, error) { 423 return io.ReadAll(r.DotReader()) 424 } 425 426 // ReadDotLines reads a dot-encoding and returns a slice 427 // containing the decoded lines, with the final \r\n or \n elided from each. 428 // 429 // See the documentation for the DotReader method for details about dot-encoding. 430 func (r *Reader) ReadDotLines() ([]string, error) { 431 // We could use ReadDotBytes and then Split it, 432 // but reading a line at a time avoids needing a 433 // large contiguous block of memory and is simpler. 434 var v []string 435 var err error 436 for { 437 var line string 438 line, err = r.ReadLine() 439 if err != nil { 440 if err == io.EOF { 441 err = io.ErrUnexpectedEOF 442 } 443 break 444 } 445 446 // Dot by itself marks end; otherwise cut one dot. 447 if len(line) > 0 && line[0] == '.' { 448 if len(line) == 1 { 449 break 450 } 451 line = line[1:] 452 } 453 v = append(v, line) 454 } 455 return v, err 456 } 457 458 var colon = []byte(":") 459 460 // ReadMIMEHeader reads a MIME-style header from r. 461 // The header is a sequence of possibly continued Key: Value lines 462 // ending in a blank line. 463 // The returned map m maps CanonicalMIMEHeaderKey(key) to a 464 // sequence of values in the same order encountered in the input. 465 // 466 // For example, consider this input: 467 // 468 // My-Key: Value 1 469 // Long-Key: Even 470 // Longer Value 471 // My-Key: Value 2 472 // 473 // Given that input, ReadMIMEHeader returns the map: 474 // 475 // map[string][]string{ 476 // "My-Key": {"Value 1", "Value 2"}, 477 // "Long-Key": {"Even Longer Value"}, 478 // } 479 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { 480 // Avoid lots of small slice allocations later by allocating one 481 // large one ahead of time which we'll cut up into smaller 482 // slices. If this isn't big enough later, we allocate small ones. 483 var strs []string 484 hint := r.upcomingHeaderNewlines() 485 if hint > 0 { 486 strs = make([]string, hint) 487 } 488 489 m := make(MIMEHeader, hint) 490 491 // The first line cannot start with a leading space. 492 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { 493 line, err := r.readLineSlice() 494 if err != nil { 495 return m, err 496 } 497 return m, ProtocolError("malformed MIME header initial line: " + string(line)) 498 } 499 500 for { 501 kv, err := r.readContinuedLineSlice(mustHaveFieldNameColon) 502 if len(kv) == 0 { 503 return m, err 504 } 505 506 // Key ends at first colon. 507 k, v, ok := bytes.Cut(kv, colon) 508 if !ok { 509 return m, ProtocolError("malformed MIME header line: " + string(kv)) 510 } 511 key := canonicalMIMEHeaderKey(k) 512 513 // As per RFC 7230 field-name is a token, tokens consist of one or more chars. 514 // We could return a ProtocolError here, but better to be liberal in what we 515 // accept, so if we get an empty key, skip it. 516 if key == "" { 517 continue 518 } 519 520 // Skip initial spaces in value. 521 value := strings.TrimLeft(string(v), " \t") 522 523 vv := m[key] 524 if vv == nil && len(strs) > 0 { 525 // More than likely this will be a single-element key. 526 // Most headers aren't multi-valued. 527 // Set the capacity on strs[0] to 1, so any future append 528 // won't extend the slice into the other strings. 529 vv, strs = strs[:1:1], strs[1:] 530 vv[0] = value 531 m[key] = vv 532 } else { 533 m[key] = append(vv, value) 534 } 535 536 if err != nil { 537 return m, err 538 } 539 } 540 } 541 542 // noValidation is a no-op validation func for readContinuedLineSlice 543 // that permits any lines. 544 func noValidation(_ []byte) error { return nil } 545 546 // mustHaveFieldNameColon ensures that, per RFC 7230, the 547 // field-name is on a single line, so the first line must 548 // contain a colon. 549 func mustHaveFieldNameColon(line []byte) error { 550 if bytes.IndexByte(line, ':') < 0 { 551 return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line)) 552 } 553 return nil 554 } 555 556 var nl = []byte("\n") 557 558 // upcomingHeaderNewlines returns an approximation of the number of newlines 559 // that will be in this header. If it gets confused, it returns 0. 560 func (r *Reader) upcomingHeaderNewlines() (n int) { 561 // Try to determine the 'hint' size. 562 r.R.Peek(1) // force a buffer load if empty 563 s := r.R.Buffered() 564 if s == 0 { 565 return 566 } 567 peek, _ := r.R.Peek(s) 568 return bytes.Count(peek, nl) 569 } 570 571 // CanonicalMIMEHeaderKey returns the canonical format of the 572 // MIME header key s. The canonicalization converts the first 573 // letter and any letter following a hyphen to upper case; 574 // the rest are converted to lowercase. For example, the 575 // canonical key for "accept-encoding" is "Accept-Encoding". 576 // MIME header keys are assumed to be ASCII only. 577 // If s contains a space or invalid header field bytes, it is 578 // returned without modifications. 579 func CanonicalMIMEHeaderKey(s string) string { 580 // Quick check for canonical encoding. 581 upper := true 582 for i := 0; i < len(s); i++ { 583 c := s[i] 584 if !validHeaderFieldByte(c) { 585 return s 586 } 587 if upper && 'a' <= c && c <= 'z' { 588 return canonicalMIMEHeaderKey([]byte(s)) 589 } 590 if !upper && 'A' <= c && c <= 'Z' { 591 return canonicalMIMEHeaderKey([]byte(s)) 592 } 593 upper = c == '-' 594 } 595 return s 596 } 597 598 const toLower = 'a' - 'A' 599 600 // validHeaderFieldByte reports whether b is a valid byte in a header 601 // field name. RFC 7230 says: 602 // 603 // header-field = field-name ":" OWS field-value OWS 604 // field-name = token 605 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 606 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 607 // token = 1*tchar 608 func validHeaderFieldByte(b byte) bool { 609 return int(b) < len(isTokenTable) && isTokenTable[b] 610 } 611 612 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 613 // allowed to mutate the provided byte slice before returning the 614 // string. 615 // 616 // For invalid inputs (if a contains spaces or non-token bytes), a 617 // is unchanged and a string copy is returned. 618 func canonicalMIMEHeaderKey(a []byte) string { 619 // See if a looks like a header key. If not, return it unchanged. 620 for _, c := range a { 621 if validHeaderFieldByte(c) { 622 continue 623 } 624 // Don't canonicalize. 625 return string(a) 626 } 627 628 upper := true 629 for i, c := range a { 630 // Canonicalize: first letter upper case 631 // and upper case after each dash. 632 // (Host, User-Agent, If-Modified-Since). 633 // MIME headers are ASCII only, so no Unicode issues. 634 if upper && 'a' <= c && c <= 'z' { 635 c -= toLower 636 } else if !upper && 'A' <= c && c <= 'Z' { 637 c += toLower 638 } 639 a[i] = c 640 upper = c == '-' // for next time 641 } 642 commonHeaderOnce.Do(initCommonHeader) 643 // The compiler recognizes m[string(byteSlice)] as a special 644 // case, so a copy of a's bytes into a new string does not 645 // happen in this map lookup: 646 if v := commonHeader[string(a)]; v != "" { 647 return v 648 } 649 return string(a) 650 } 651 652 // commonHeader interns common header strings. 653 var commonHeader map[string]string 654 655 var commonHeaderOnce sync.Once 656 657 func initCommonHeader() { 658 commonHeader = make(map[string]string) 659 for _, v := range []string{ 660 "Accept", 661 "Accept-Charset", 662 "Accept-Encoding", 663 "Accept-Language", 664 "Accept-Ranges", 665 "Cache-Control", 666 "Cc", 667 "Connection", 668 "Content-Id", 669 "Content-Language", 670 "Content-Length", 671 "Content-Transfer-Encoding", 672 "Content-Type", 673 "Cookie", 674 "Date", 675 "Dkim-Signature", 676 "Etag", 677 "Expires", 678 "From", 679 "Host", 680 "If-Modified-Since", 681 "If-None-Match", 682 "In-Reply-To", 683 "Last-Modified", 684 "Location", 685 "Message-Id", 686 "Mime-Version", 687 "Pragma", 688 "Received", 689 "Return-Path", 690 "Server", 691 "Set-Cookie", 692 "Subject", 693 "To", 694 "User-Agent", 695 "Via", 696 "X-Forwarded-For", 697 "X-Imforwards", 698 "X-Powered-By", 699 } { 700 commonHeader[v] = v 701 } 702 } 703 704 // isTokenTable is a copy of net/http/lex.go's isTokenTable. 705 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators 706 var isTokenTable = [127]bool{ 707 '!': true, 708 '#': true, 709 '$': true, 710 '%': true, 711 '&': true, 712 '\'': true, 713 '*': true, 714 '+': true, 715 '-': true, 716 '.': true, 717 '0': true, 718 '1': true, 719 '2': true, 720 '3': true, 721 '4': true, 722 '5': true, 723 '6': true, 724 '7': true, 725 '8': true, 726 '9': true, 727 'A': true, 728 'B': true, 729 'C': true, 730 'D': true, 731 'E': true, 732 'F': true, 733 'G': true, 734 'H': true, 735 'I': true, 736 'J': true, 737 'K': true, 738 'L': true, 739 'M': true, 740 'N': true, 741 'O': true, 742 'P': true, 743 'Q': true, 744 'R': true, 745 'S': true, 746 'T': true, 747 'U': true, 748 'W': true, 749 'V': true, 750 'X': true, 751 'Y': true, 752 'Z': true, 753 '^': true, 754 '_': true, 755 '`': true, 756 'a': true, 757 'b': true, 758 'c': true, 759 'd': true, 760 'e': true, 761 'f': true, 762 'g': true, 763 'h': true, 764 'i': true, 765 'j': true, 766 'k': true, 767 'l': true, 768 'm': true, 769 'n': true, 770 'o': true, 771 'p': true, 772 'q': true, 773 'r': true, 774 's': true, 775 't': true, 776 'u': true, 777 'v': true, 778 'w': true, 779 'x': true, 780 'y': true, 781 'z': true, 782 '|': true, 783 '~': true, 784 }