github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/net/textproto/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textproto 6 7 import ( 8 "bufio" 9 "bytes" 10 "io" 11 "io/ioutil" 12 "strconv" 13 "strings" 14 "sync" 15 ) 16 17 // A Reader implements convenience methods for reading requests 18 // or responses from a text protocol network connection. 19 type Reader struct { 20 R *bufio.Reader 21 dot *dotReader 22 buf []byte // a re-usable buffer for readContinuedLineSlice 23 } 24 25 // NewReader returns a new Reader reading from r. 26 // 27 // To avoid denial of service attacks, the provided bufio.Reader 28 // should be reading from an io.LimitReader or similar Reader to bound 29 // the size of responses. 30 func NewReader(r *bufio.Reader) *Reader { 31 commonHeaderOnce.Do(initCommonHeader) 32 return &Reader{R: r} 33 } 34 35 // ReadLine reads a single line from r, 36 // eliding the final \n or \r\n from the returned string. 37 func (r *Reader) ReadLine() (string, error) { 38 line, err := r.readLineSlice() 39 return string(line), err 40 } 41 42 // ReadLineBytes is like ReadLine but returns a []byte instead of a string. 43 func (r *Reader) ReadLineBytes() ([]byte, error) { 44 line, err := r.readLineSlice() 45 if line != nil { 46 buf := make([]byte, len(line)) 47 copy(buf, line) 48 line = buf 49 } 50 return line, err 51 } 52 53 func (r *Reader) readLineSlice() ([]byte, error) { 54 r.closeDot() 55 var line []byte 56 for { 57 l, more, err := r.R.ReadLine() 58 if err != nil { 59 return nil, err 60 } 61 // Avoid the copy if the first call produced a full line. 62 if line == nil && !more { 63 return l, nil 64 } 65 line = append(line, l...) 66 if !more { 67 break 68 } 69 } 70 return line, nil 71 } 72 73 // ReadContinuedLine reads a possibly continued line from r, 74 // eliding the final trailing ASCII white space. 75 // Lines after the first are considered continuations if they 76 // begin with a space or tab character. In the returned data, 77 // continuation lines are separated from the previous line 78 // only by a single space: the newline and leading white space 79 // are removed. 80 // 81 // For example, consider this input: 82 // 83 // Line 1 84 // continued... 85 // Line 2 86 // 87 // The first call to ReadContinuedLine will return "Line 1 continued..." 88 // and the second will return "Line 2". 89 // 90 // A line consisting of only white space is never continued. 91 // 92 func (r *Reader) ReadContinuedLine() (string, error) { 93 line, err := r.readContinuedLineSlice() 94 return string(line), err 95 } 96 97 // trim returns s with leading and trailing spaces and tabs removed. 98 // It does not assume Unicode or UTF-8. 99 func trim(s []byte) []byte { 100 i := 0 101 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 102 i++ 103 } 104 n := len(s) 105 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') { 106 n-- 107 } 108 return s[i:n] 109 } 110 111 // ReadContinuedLineBytes is like ReadContinuedLine but 112 // returns a []byte instead of a string. 113 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { 114 line, err := r.readContinuedLineSlice() 115 if line != nil { 116 buf := make([]byte, len(line)) 117 copy(buf, line) 118 line = buf 119 } 120 return line, err 121 } 122 123 func (r *Reader) readContinuedLineSlice() ([]byte, error) { 124 // Read the first line. 125 line, err := r.readLineSlice() 126 if err != nil { 127 return nil, err 128 } 129 if len(line) == 0 { // blank line - no continuation 130 return line, nil 131 } 132 133 // Optimistically assume that we have started to buffer the next line 134 // and it starts with an ASCII letter (the next header key), or a blank 135 // line, so we can avoid copying that buffered data around in memory 136 // and skipping over non-existent whitespace. 137 if r.R.Buffered() > 1 { 138 peek, _ := r.R.Peek(2) 139 if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') || 140 len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' { 141 return trim(line), nil 142 } 143 } 144 145 // ReadByte or the next readLineSlice will flush the read buffer; 146 // copy the slice into buf. 147 r.buf = append(r.buf[:0], trim(line)...) 148 149 // Read continuation lines. 150 for r.skipSpace() > 0 { 151 line, err := r.readLineSlice() 152 if err != nil { 153 break 154 } 155 r.buf = append(r.buf, ' ') 156 r.buf = append(r.buf, trim(line)...) 157 } 158 return r.buf, nil 159 } 160 161 // skipSpace skips R over all spaces and returns the number of bytes skipped. 162 func (r *Reader) skipSpace() int { 163 n := 0 164 for { 165 c, err := r.R.ReadByte() 166 if err != nil { 167 // Bufio will keep err until next read. 168 break 169 } 170 if c != ' ' && c != '\t' { 171 r.R.UnreadByte() 172 break 173 } 174 n++ 175 } 176 return n 177 } 178 179 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { 180 line, err := r.ReadLine() 181 if err != nil { 182 return 183 } 184 return parseCodeLine(line, expectCode) 185 } 186 187 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { 188 if len(line) < 4 || line[3] != ' ' && line[3] != '-' { 189 err = ProtocolError("short response: " + line) 190 return 191 } 192 continued = line[3] == '-' 193 code, err = strconv.Atoi(line[0:3]) 194 if err != nil || code < 100 { 195 err = ProtocolError("invalid response code: " + line) 196 return 197 } 198 message = line[4:] 199 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || 200 10 <= expectCode && expectCode < 100 && code/10 != expectCode || 201 100 <= expectCode && expectCode < 1000 && code != expectCode { 202 err = &Error{code, message} 203 } 204 return 205 } 206 207 // ReadCodeLine reads a response code line of the form 208 // code message 209 // where code is a three-digit status code and the message 210 // extends to the rest of the line. An example of such a line is: 211 // 220 plan9.bell-labs.com ESMTP 212 // 213 // If the prefix of the status does not match the digits in expectCode, 214 // ReadCodeLine returns with err set to &Error{code, message}. 215 // For example, if expectCode is 31, an error will be returned if 216 // the status is not in the range [310,319]. 217 // 218 // If the response is multi-line, ReadCodeLine returns an error. 219 // 220 // An expectCode <= 0 disables the check of the status code. 221 // 222 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { 223 code, continued, message, err := r.readCodeLine(expectCode) 224 if err == nil && continued { 225 err = ProtocolError("unexpected multi-line response: " + message) 226 } 227 return 228 } 229 230 // ReadResponse reads a multi-line response of the form: 231 // 232 // code-message line 1 233 // code-message line 2 234 // ... 235 // code message line n 236 // 237 // where code is a three-digit status code. The first line starts with the 238 // code and a hyphen. The response is terminated by a line that starts 239 // with the same code followed by a space. Each line in message is 240 // separated by a newline (\n). 241 // 242 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for 243 // details of another form of response accepted: 244 // 245 // code-message line 1 246 // message line 2 247 // ... 248 // code message line n 249 // 250 // If the prefix of the status does not match the digits in expectCode, 251 // ReadResponse returns with err set to &Error{code, message}. 252 // For example, if expectCode is 31, an error will be returned if 253 // the status is not in the range [310,319]. 254 // 255 // An expectCode <= 0 disables the check of the status code. 256 // 257 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { 258 code, continued, message, err := r.readCodeLine(expectCode) 259 multi := continued 260 for continued { 261 line, err := r.ReadLine() 262 if err != nil { 263 return 0, "", err 264 } 265 266 var code2 int 267 var moreMessage string 268 code2, continued, moreMessage, err = parseCodeLine(line, 0) 269 if err != nil || code2 != code { 270 message += "\n" + strings.TrimRight(line, "\r\n") 271 continued = true 272 continue 273 } 274 message += "\n" + moreMessage 275 } 276 if err != nil && multi && message != "" { 277 // replace one line error message with all lines (full message) 278 err = &Error{code, message} 279 } 280 return 281 } 282 283 // DotReader returns a new Reader that satisfies Reads using the 284 // decoded text of a dot-encoded block read from r. 285 // The returned Reader is only valid until the next call 286 // to a method on r. 287 // 288 // Dot encoding is a common framing used for data blocks 289 // in text protocols such as SMTP. The data consists of a sequence 290 // of lines, each of which ends in "\r\n". The sequence itself 291 // ends at a line containing just a dot: ".\r\n". Lines beginning 292 // with a dot are escaped with an additional dot to avoid 293 // looking like the end of the sequence. 294 // 295 // The decoded form returned by the Reader's Read method 296 // rewrites the "\r\n" line endings into the simpler "\n", 297 // removes leading dot escapes if present, and stops with error io.EOF 298 // after consuming (and discarding) the end-of-sequence line. 299 func (r *Reader) DotReader() io.Reader { 300 r.closeDot() 301 r.dot = &dotReader{r: r} 302 return r.dot 303 } 304 305 type dotReader struct { 306 r *Reader 307 state int 308 } 309 310 // Read satisfies reads by decoding dot-encoded data read from d.r. 311 func (d *dotReader) Read(b []byte) (n int, err error) { 312 // Run data through a simple state machine to 313 // elide leading dots, rewrite trailing \r\n into \n, 314 // and detect ending .\r\n line. 315 const ( 316 stateBeginLine = iota // beginning of line; initial state; must be zero 317 stateDot // read . at beginning of line 318 stateDotCR // read .\r at beginning of line 319 stateCR // read \r (possibly at end of line) 320 stateData // reading data in middle of line 321 stateEOF // reached .\r\n end marker line 322 ) 323 br := d.r.R 324 for n < len(b) && d.state != stateEOF { 325 var c byte 326 c, err = br.ReadByte() 327 if err != nil { 328 if err == io.EOF { 329 err = io.ErrUnexpectedEOF 330 } 331 break 332 } 333 switch d.state { 334 case stateBeginLine: 335 if c == '.' { 336 d.state = stateDot 337 continue 338 } 339 if c == '\r' { 340 d.state = stateCR 341 continue 342 } 343 d.state = stateData 344 345 case stateDot: 346 if c == '\r' { 347 d.state = stateDotCR 348 continue 349 } 350 if c == '\n' { 351 d.state = stateEOF 352 continue 353 } 354 d.state = stateData 355 356 case stateDotCR: 357 if c == '\n' { 358 d.state = stateEOF 359 continue 360 } 361 // Not part of .\r\n. 362 // Consume leading dot and emit saved \r. 363 br.UnreadByte() 364 c = '\r' 365 d.state = stateData 366 367 case stateCR: 368 if c == '\n' { 369 d.state = stateBeginLine 370 break 371 } 372 // Not part of \r\n. Emit saved \r 373 br.UnreadByte() 374 c = '\r' 375 d.state = stateData 376 377 case stateData: 378 if c == '\r' { 379 d.state = stateCR 380 continue 381 } 382 if c == '\n' { 383 d.state = stateBeginLine 384 } 385 } 386 b[n] = c 387 n++ 388 } 389 if err == nil && d.state == stateEOF { 390 err = io.EOF 391 } 392 if err != nil && d.r.dot == d { 393 d.r.dot = nil 394 } 395 return 396 } 397 398 // closeDot drains the current DotReader if any, 399 // making sure that it reads until the ending dot line. 400 func (r *Reader) closeDot() { 401 if r.dot == nil { 402 return 403 } 404 buf := make([]byte, 128) 405 for r.dot != nil { 406 // When Read reaches EOF or an error, 407 // it will set r.dot == nil. 408 r.dot.Read(buf) 409 } 410 } 411 412 // ReadDotBytes reads a dot-encoding and returns the decoded data. 413 // 414 // See the documentation for the DotReader method for details about dot-encoding. 415 func (r *Reader) ReadDotBytes() ([]byte, error) { 416 return ioutil.ReadAll(r.DotReader()) 417 } 418 419 // ReadDotLines reads a dot-encoding and returns a slice 420 // containing the decoded lines, with the final \r\n or \n elided from each. 421 // 422 // See the documentation for the DotReader method for details about dot-encoding. 423 func (r *Reader) ReadDotLines() ([]string, error) { 424 // We could use ReadDotBytes and then Split it, 425 // but reading a line at a time avoids needing a 426 // large contiguous block of memory and is simpler. 427 var v []string 428 var err error 429 for { 430 var line string 431 line, err = r.ReadLine() 432 if err != nil { 433 if err == io.EOF { 434 err = io.ErrUnexpectedEOF 435 } 436 break 437 } 438 439 // Dot by itself marks end; otherwise cut one dot. 440 if len(line) > 0 && line[0] == '.' { 441 if len(line) == 1 { 442 break 443 } 444 line = line[1:] 445 } 446 v = append(v, line) 447 } 448 return v, err 449 } 450 451 // ReadMIMEHeader reads a MIME-style header from r. 452 // The header is a sequence of possibly continued Key: Value lines 453 // ending in a blank line. 454 // The returned map m maps CanonicalMIMEHeaderKey(key) to a 455 // sequence of values in the same order encountered in the input. 456 // 457 // For example, consider this input: 458 // 459 // My-Key: Value 1 460 // Long-Key: Even 461 // Longer Value 462 // My-Key: Value 2 463 // 464 // Given that input, ReadMIMEHeader returns the map: 465 // 466 // map[string][]string{ 467 // "My-Key": {"Value 1", "Value 2"}, 468 // "Long-Key": {"Even Longer Value"}, 469 // } 470 // 471 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { 472 // Avoid lots of small slice allocations later by allocating one 473 // large one ahead of time which we'll cut up into smaller 474 // slices. If this isn't big enough later, we allocate small ones. 475 var strs []string 476 hint := r.upcomingHeaderNewlines() 477 if hint > 0 { 478 strs = make([]string, hint) 479 } 480 481 m := make(MIMEHeader, hint) 482 483 // The first line cannot start with a leading space. 484 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { 485 line, err := r.readLineSlice() 486 if err != nil { 487 return m, err 488 } 489 return m, ProtocolError("malformed MIME header initial line: " + string(line)) 490 } 491 492 for { 493 kv, err := r.readContinuedLineSlice() 494 if len(kv) == 0 { 495 return m, err 496 } 497 498 // Key ends at first colon. 499 i := bytes.IndexByte(kv, ':') 500 if i < 0 { 501 return m, ProtocolError("malformed MIME header line: " + string(kv)) 502 } 503 key := canonicalMIMEHeaderKey(kv[:i]) 504 505 // As per RFC 7230 field-name is a token, tokens consist of one or more chars. 506 // We could return a ProtocolError here, but better to be liberal in what we 507 // accept, so if we get an empty key, skip it. 508 if key == "" { 509 continue 510 } 511 512 // Skip initial spaces in value. 513 i++ // skip colon 514 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') { 515 i++ 516 } 517 value := string(kv[i:]) 518 519 vv := m[key] 520 if vv == nil && len(strs) > 0 { 521 // More than likely this will be a single-element key. 522 // Most headers aren't multi-valued. 523 // Set the capacity on strs[0] to 1, so any future append 524 // won't extend the slice into the other strings. 525 vv, strs = strs[:1:1], strs[1:] 526 vv[0] = value 527 m[key] = vv 528 } else { 529 m[key] = append(vv, value) 530 } 531 532 if err != nil { 533 return m, err 534 } 535 } 536 } 537 538 // upcomingHeaderNewlines returns an approximation of the number of newlines 539 // that will be in this header. If it gets confused, it returns 0. 540 func (r *Reader) upcomingHeaderNewlines() (n int) { 541 // Try to determine the 'hint' size. 542 r.R.Peek(1) // force a buffer load if empty 543 s := r.R.Buffered() 544 if s == 0 { 545 return 546 } 547 peek, _ := r.R.Peek(s) 548 for len(peek) > 0 { 549 i := bytes.IndexByte(peek, '\n') 550 if i < 3 { 551 // Not present (-1) or found within the next few bytes, 552 // implying we're at the end ("\r\n\r\n" or "\n\n") 553 return 554 } 555 n++ 556 peek = peek[i+1:] 557 } 558 return 559 } 560 561 // CanonicalMIMEHeaderKey returns the canonical format of the 562 // MIME header key s. The canonicalization converts the first 563 // letter and any letter following a hyphen to upper case; 564 // the rest are converted to lowercase. For example, the 565 // canonical key for "accept-encoding" is "Accept-Encoding". 566 // MIME header keys are assumed to be ASCII only. 567 // If s contains a space or invalid header field bytes, it is 568 // returned without modifications. 569 func CanonicalMIMEHeaderKey(s string) string { 570 commonHeaderOnce.Do(initCommonHeader) 571 572 // Quick check for canonical encoding. 573 upper := true 574 for i := 0; i < len(s); i++ { 575 c := s[i] 576 if !validHeaderFieldByte(c) { 577 return s 578 } 579 if upper && 'a' <= c && c <= 'z' { 580 return canonicalMIMEHeaderKey([]byte(s)) 581 } 582 if !upper && 'A' <= c && c <= 'Z' { 583 return canonicalMIMEHeaderKey([]byte(s)) 584 } 585 upper = c == '-' 586 } 587 return s 588 } 589 590 const toLower = 'a' - 'A' 591 592 // validHeaderFieldByte reports whether b is a valid byte in a header 593 // field name. RFC 7230 says: 594 // header-field = field-name ":" OWS field-value OWS 595 // field-name = token 596 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 597 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 598 // token = 1*tchar 599 func validHeaderFieldByte(b byte) bool { 600 return int(b) < len(isTokenTable) && isTokenTable[b] 601 } 602 603 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 604 // allowed to mutate the provided byte slice before returning the 605 // string. 606 // 607 // For invalid inputs (if a contains spaces or non-token bytes), a 608 // is unchanged and a string copy is returned. 609 func canonicalMIMEHeaderKey(a []byte) string { 610 // See if a looks like a header key. If not, return it unchanged. 611 for _, c := range a { 612 if validHeaderFieldByte(c) { 613 continue 614 } 615 // Don't canonicalize. 616 return string(a) 617 } 618 619 upper := true 620 for i, c := range a { 621 // Canonicalize: first letter upper case 622 // and upper case after each dash. 623 // (Host, User-Agent, If-Modified-Since). 624 // MIME headers are ASCII only, so no Unicode issues. 625 if upper && 'a' <= c && c <= 'z' { 626 c -= toLower 627 } else if !upper && 'A' <= c && c <= 'Z' { 628 c += toLower 629 } 630 a[i] = c 631 upper = c == '-' // for next time 632 } 633 // The compiler recognizes m[string(byteSlice)] as a special 634 // case, so a copy of a's bytes into a new string does not 635 // happen in this map lookup: 636 if v := commonHeader[string(a)]; v != "" { 637 return v 638 } 639 return string(a) 640 } 641 642 // commonHeader interns common header strings. 643 var commonHeader map[string]string 644 645 var commonHeaderOnce sync.Once 646 647 func initCommonHeader() { 648 commonHeader = make(map[string]string) 649 for _, v := range []string{ 650 "Accept", 651 "Accept-Charset", 652 "Accept-Encoding", 653 "Accept-Language", 654 "Accept-Ranges", 655 "Cache-Control", 656 "Cc", 657 "Connection", 658 "Content-Id", 659 "Content-Language", 660 "Content-Length", 661 "Content-Transfer-Encoding", 662 "Content-Type", 663 "Cookie", 664 "Date", 665 "Dkim-Signature", 666 "Etag", 667 "Expires", 668 "From", 669 "Host", 670 "If-Modified-Since", 671 "If-None-Match", 672 "In-Reply-To", 673 "Last-Modified", 674 "Location", 675 "Message-Id", 676 "Mime-Version", 677 "Pragma", 678 "Received", 679 "Return-Path", 680 "Server", 681 "Set-Cookie", 682 "Subject", 683 "To", 684 "User-Agent", 685 "Via", 686 "X-Forwarded-For", 687 "X-Imforwards", 688 "X-Powered-By", 689 } { 690 commonHeader[v] = v 691 } 692 } 693 694 // isTokenTable is a copy of net/http/lex.go's isTokenTable. 695 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators 696 var isTokenTable = [127]bool{ 697 '!': true, 698 '#': true, 699 '$': true, 700 '%': true, 701 '&': true, 702 '\'': true, 703 '*': true, 704 '+': true, 705 '-': true, 706 '.': true, 707 '0': true, 708 '1': true, 709 '2': true, 710 '3': true, 711 '4': true, 712 '5': true, 713 '6': true, 714 '7': true, 715 '8': true, 716 '9': true, 717 'A': true, 718 'B': true, 719 'C': true, 720 'D': true, 721 'E': true, 722 'F': true, 723 'G': true, 724 'H': true, 725 'I': true, 726 'J': true, 727 'K': true, 728 'L': true, 729 'M': true, 730 'N': true, 731 'O': true, 732 'P': true, 733 'Q': true, 734 'R': true, 735 'S': true, 736 'T': true, 737 'U': true, 738 'W': true, 739 'V': true, 740 'X': true, 741 'Y': true, 742 'Z': true, 743 '^': true, 744 '_': true, 745 '`': true, 746 'a': true, 747 'b': true, 748 'c': true, 749 'd': true, 750 'e': true, 751 'f': true, 752 'g': true, 753 'h': true, 754 'i': true, 755 'j': true, 756 'k': true, 757 'l': true, 758 'm': true, 759 'n': true, 760 'o': true, 761 'p': true, 762 'q': true, 763 'r': true, 764 's': true, 765 't': true, 766 'u': true, 767 'v': true, 768 'w': true, 769 'x': true, 770 'y': true, 771 'z': true, 772 '|': true, 773 '~': true, 774 }