github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/net/textproto/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textproto 6 7 import ( 8 "bufio" 9 "bytes" 10 "io" 11 "io/ioutil" 12 "strconv" 13 "strings" 14 ) 15 16 // A Reader implements convenience methods for reading requests 17 // or responses from a text protocol network connection. 18 type Reader struct { 19 R *bufio.Reader 20 dot *dotReader 21 buf []byte // a re-usable buffer for readContinuedLineSlice 22 } 23 24 // NewReader returns a new Reader reading from r. 25 // 26 // To avoid denial of service attacks, the provided bufio.Reader 27 // should be reading from an io.LimitReader or similar Reader to bound 28 // the size of responses. 29 func NewReader(r *bufio.Reader) *Reader { 30 return &Reader{R: r} 31 } 32 33 // ReadLine reads a single line from r, 34 // eliding the final \n or \r\n from the returned string. 35 func (r *Reader) ReadLine() (string, error) { 36 line, err := r.readLineSlice() 37 return string(line), err 38 } 39 40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string. 41 func (r *Reader) ReadLineBytes() ([]byte, error) { 42 line, err := r.readLineSlice() 43 if line != nil { 44 buf := make([]byte, len(line)) 45 copy(buf, line) 46 line = buf 47 } 48 return line, err 49 } 50 51 func (r *Reader) readLineSlice() ([]byte, error) { 52 r.closeDot() 53 var line []byte 54 for { 55 l, more, err := r.R.ReadLine() 56 if err != nil { 57 return nil, err 58 } 59 // Avoid the copy if the first call produced a full line. 60 if line == nil && !more { 61 return l, nil 62 } 63 line = append(line, l...) 64 if !more { 65 break 66 } 67 } 68 return line, nil 69 } 70 71 // ReadContinuedLine reads a possibly continued line from r, 72 // eliding the final trailing ASCII white space. 73 // Lines after the first are considered continuations if they 74 // begin with a space or tab character. In the returned data, 75 // continuation lines are separated from the previous line 76 // only by a single space: the newline and leading white space 77 // are removed. 78 // 79 // For example, consider this input: 80 // 81 // Line 1 82 // continued... 83 // Line 2 84 // 85 // The first call to ReadContinuedLine will return "Line 1 continued..." 86 // and the second will return "Line 2". 87 // 88 // A line consisting of only white space is never continued. 89 // 90 func (r *Reader) ReadContinuedLine() (string, error) { 91 line, err := r.readContinuedLineSlice() 92 return string(line), err 93 } 94 95 // trim returns s with leading and trailing spaces and tabs removed. 96 // It does not assume Unicode or UTF-8. 97 func trim(s []byte) []byte { 98 i := 0 99 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 100 i++ 101 } 102 n := len(s) 103 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') { 104 n-- 105 } 106 return s[i:n] 107 } 108 109 // ReadContinuedLineBytes is like ReadContinuedLine but 110 // returns a []byte instead of a string. 111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { 112 line, err := r.readContinuedLineSlice() 113 if line != nil { 114 buf := make([]byte, len(line)) 115 copy(buf, line) 116 line = buf 117 } 118 return line, err 119 } 120 121 func (r *Reader) readContinuedLineSlice() ([]byte, error) { 122 // Read the first line. 123 line, err := r.readLineSlice() 124 if err != nil { 125 return nil, err 126 } 127 if len(line) == 0 { // blank line - no continuation 128 return line, nil 129 } 130 131 // Optimistically assume that we have started to buffer the next line 132 // and it starts with an ASCII letter (the next header key), or a blank 133 // line, so we can avoid copying that buffered data around in memory 134 // and skipping over non-existent whitespace. 135 if r.R.Buffered() > 1 { 136 peek, _ := r.R.Peek(2) 137 if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') || 138 len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' { 139 return trim(line), nil 140 } 141 } 142 143 // ReadByte or the next readLineSlice will flush the read buffer; 144 // copy the slice into buf. 145 r.buf = append(r.buf[:0], trim(line)...) 146 147 // Read continuation lines. 148 for r.skipSpace() > 0 { 149 line, err := r.readLineSlice() 150 if err != nil { 151 break 152 } 153 r.buf = append(r.buf, ' ') 154 r.buf = append(r.buf, trim(line)...) 155 } 156 return r.buf, nil 157 } 158 159 // skipSpace skips R over all spaces and returns the number of bytes skipped. 160 func (r *Reader) skipSpace() int { 161 n := 0 162 for { 163 c, err := r.R.ReadByte() 164 if err != nil { 165 // Bufio will keep err until next read. 166 break 167 } 168 if c != ' ' && c != '\t' { 169 r.R.UnreadByte() 170 break 171 } 172 n++ 173 } 174 return n 175 } 176 177 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { 178 line, err := r.ReadLine() 179 if err != nil { 180 return 181 } 182 return parseCodeLine(line, expectCode) 183 } 184 185 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { 186 if len(line) < 4 || line[3] != ' ' && line[3] != '-' { 187 err = ProtocolError("short response: " + line) 188 return 189 } 190 continued = line[3] == '-' 191 code, err = strconv.Atoi(line[0:3]) 192 if err != nil || code < 100 { 193 err = ProtocolError("invalid response code: " + line) 194 return 195 } 196 message = line[4:] 197 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || 198 10 <= expectCode && expectCode < 100 && code/10 != expectCode || 199 100 <= expectCode && expectCode < 1000 && code != expectCode { 200 err = &Error{code, message} 201 } 202 return 203 } 204 205 // ReadCodeLine reads a response code line of the form 206 // code message 207 // where code is a three-digit status code and the message 208 // extends to the rest of the line. An example of such a line is: 209 // 220 plan9.bell-labs.com ESMTP 210 // 211 // If the prefix of the status does not match the digits in expectCode, 212 // ReadCodeLine returns with err set to &Error{code, message}. 213 // For example, if expectCode is 31, an error will be returned if 214 // the status is not in the range [310,319]. 215 // 216 // If the response is multi-line, ReadCodeLine returns an error. 217 // 218 // An expectCode <= 0 disables the check of the status code. 219 // 220 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { 221 code, continued, message, err := r.readCodeLine(expectCode) 222 if err == nil && continued { 223 err = ProtocolError("unexpected multi-line response: " + message) 224 } 225 return 226 } 227 228 // ReadResponse reads a multi-line response of the form: 229 // 230 // code-message line 1 231 // code-message line 2 232 // ... 233 // code message line n 234 // 235 // where code is a three-digit status code. The first line starts with the 236 // code and a hyphen. The response is terminated by a line that starts 237 // with the same code followed by a space. Each line in message is 238 // separated by a newline (\n). 239 // 240 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for 241 // details of another form of response accepted: 242 // 243 // code-message line 1 244 // message line 2 245 // ... 246 // code message line n 247 // 248 // If the prefix of the status does not match the digits in expectCode, 249 // ReadResponse returns with err set to &Error{code, message}. 250 // For example, if expectCode is 31, an error will be returned if 251 // the status is not in the range [310,319]. 252 // 253 // An expectCode <= 0 disables the check of the status code. 254 // 255 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { 256 code, continued, message, err := r.readCodeLine(expectCode) 257 multi := continued 258 for continued { 259 line, err := r.ReadLine() 260 if err != nil { 261 return 0, "", err 262 } 263 264 var code2 int 265 var moreMessage string 266 code2, continued, moreMessage, err = parseCodeLine(line, 0) 267 if err != nil || code2 != code { 268 message += "\n" + strings.TrimRight(line, "\r\n") 269 continued = true 270 continue 271 } 272 message += "\n" + moreMessage 273 } 274 if err != nil && multi && message != "" { 275 // replace one line error message with all lines (full message) 276 err = &Error{code, message} 277 } 278 return 279 } 280 281 // DotReader returns a new Reader that satisfies Reads using the 282 // decoded text of a dot-encoded block read from r. 283 // The returned Reader is only valid until the next call 284 // to a method on r. 285 // 286 // Dot encoding is a common framing used for data blocks 287 // in text protocols such as SMTP. The data consists of a sequence 288 // of lines, each of which ends in "\r\n". The sequence itself 289 // ends at a line containing just a dot: ".\r\n". Lines beginning 290 // with a dot are escaped with an additional dot to avoid 291 // looking like the end of the sequence. 292 // 293 // The decoded form returned by the Reader's Read method 294 // rewrites the "\r\n" line endings into the simpler "\n", 295 // removes leading dot escapes if present, and stops with error io.EOF 296 // after consuming (and discarding) the end-of-sequence line. 297 func (r *Reader) DotReader() io.Reader { 298 r.closeDot() 299 r.dot = &dotReader{r: r} 300 return r.dot 301 } 302 303 type dotReader struct { 304 r *Reader 305 state int 306 } 307 308 // Read satisfies reads by decoding dot-encoded data read from d.r. 309 func (d *dotReader) Read(b []byte) (n int, err error) { 310 // Run data through a simple state machine to 311 // elide leading dots, rewrite trailing \r\n into \n, 312 // and detect ending .\r\n line. 313 const ( 314 stateBeginLine = iota // beginning of line; initial state; must be zero 315 stateDot // read . at beginning of line 316 stateDotCR // read .\r at beginning of line 317 stateCR // read \r (possibly at end of line) 318 stateData // reading data in middle of line 319 stateEOF // reached .\r\n end marker line 320 ) 321 br := d.r.R 322 for n < len(b) && d.state != stateEOF { 323 var c byte 324 c, err = br.ReadByte() 325 if err != nil { 326 if err == io.EOF { 327 err = io.ErrUnexpectedEOF 328 } 329 break 330 } 331 switch d.state { 332 case stateBeginLine: 333 if c == '.' { 334 d.state = stateDot 335 continue 336 } 337 if c == '\r' { 338 d.state = stateCR 339 continue 340 } 341 d.state = stateData 342 343 case stateDot: 344 if c == '\r' { 345 d.state = stateDotCR 346 continue 347 } 348 if c == '\n' { 349 d.state = stateEOF 350 continue 351 } 352 d.state = stateData 353 354 case stateDotCR: 355 if c == '\n' { 356 d.state = stateEOF 357 continue 358 } 359 // Not part of .\r\n. 360 // Consume leading dot and emit saved \r. 361 br.UnreadByte() 362 c = '\r' 363 d.state = stateData 364 365 case stateCR: 366 if c == '\n' { 367 d.state = stateBeginLine 368 break 369 } 370 // Not part of \r\n. Emit saved \r 371 br.UnreadByte() 372 c = '\r' 373 d.state = stateData 374 375 case stateData: 376 if c == '\r' { 377 d.state = stateCR 378 continue 379 } 380 if c == '\n' { 381 d.state = stateBeginLine 382 } 383 } 384 b[n] = c 385 n++ 386 } 387 if err == nil && d.state == stateEOF { 388 err = io.EOF 389 } 390 if err != nil && d.r.dot == d { 391 d.r.dot = nil 392 } 393 return 394 } 395 396 // closeDot drains the current DotReader if any, 397 // making sure that it reads until the ending dot line. 398 func (r *Reader) closeDot() { 399 if r.dot == nil { 400 return 401 } 402 buf := make([]byte, 128) 403 for r.dot != nil { 404 // When Read reaches EOF or an error, 405 // it will set r.dot == nil. 406 r.dot.Read(buf) 407 } 408 } 409 410 // ReadDotBytes reads a dot-encoding and returns the decoded data. 411 // 412 // See the documentation for the DotReader method for details about dot-encoding. 413 func (r *Reader) ReadDotBytes() ([]byte, error) { 414 return ioutil.ReadAll(r.DotReader()) 415 } 416 417 // ReadDotLines reads a dot-encoding and returns a slice 418 // containing the decoded lines, with the final \r\n or \n elided from each. 419 // 420 // See the documentation for the DotReader method for details about dot-encoding. 421 func (r *Reader) ReadDotLines() ([]string, error) { 422 // We could use ReadDotBytes and then Split it, 423 // but reading a line at a time avoids needing a 424 // large contiguous block of memory and is simpler. 425 var v []string 426 var err error 427 for { 428 var line string 429 line, err = r.ReadLine() 430 if err != nil { 431 if err == io.EOF { 432 err = io.ErrUnexpectedEOF 433 } 434 break 435 } 436 437 // Dot by itself marks end; otherwise cut one dot. 438 if len(line) > 0 && line[0] == '.' { 439 if len(line) == 1 { 440 break 441 } 442 line = line[1:] 443 } 444 v = append(v, line) 445 } 446 return v, err 447 } 448 449 // ReadMIMEHeader reads a MIME-style header from r. 450 // The header is a sequence of possibly continued Key: Value lines 451 // ending in a blank line. 452 // The returned map m maps CanonicalMIMEHeaderKey(key) to a 453 // sequence of values in the same order encountered in the input. 454 // 455 // For example, consider this input: 456 // 457 // My-Key: Value 1 458 // Long-Key: Even 459 // Longer Value 460 // My-Key: Value 2 461 // 462 // Given that input, ReadMIMEHeader returns the map: 463 // 464 // map[string][]string{ 465 // "My-Key": {"Value 1", "Value 2"}, 466 // "Long-Key": {"Even Longer Value"}, 467 // } 468 // 469 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { 470 // Avoid lots of small slice allocations later by allocating one 471 // large one ahead of time which we'll cut up into smaller 472 // slices. If this isn't big enough later, we allocate small ones. 473 var strs []string 474 hint := r.upcomingHeaderNewlines() 475 if hint > 0 { 476 strs = make([]string, hint) 477 } 478 479 m := make(MIMEHeader, hint) 480 481 // The first line cannot start with a leading space. 482 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { 483 line, err := r.readLineSlice() 484 if err != nil { 485 return m, err 486 } 487 return m, ProtocolError("malformed MIME header initial line: " + string(line)) 488 } 489 490 for { 491 kv, err := r.readContinuedLineSlice() 492 if len(kv) == 0 { 493 return m, err 494 } 495 496 // Key ends at first colon; should not have trailing spaces 497 // but they appear in the wild, violating specs, so we remove 498 // them if present. 499 i := bytes.IndexByte(kv, ':') 500 if i < 0 { 501 return m, ProtocolError("malformed MIME header line: " + string(kv)) 502 } 503 endKey := i 504 for endKey > 0 && kv[endKey-1] == ' ' { 505 endKey-- 506 } 507 key := canonicalMIMEHeaderKey(kv[:endKey]) 508 509 // As per RFC 7230 field-name is a token, tokens consist of one or more chars. 510 // We could return a ProtocolError here, but better to be liberal in what we 511 // accept, so if we get an empty key, skip it. 512 if key == "" { 513 continue 514 } 515 516 // Skip initial spaces in value. 517 i++ // skip colon 518 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') { 519 i++ 520 } 521 value := string(kv[i:]) 522 523 vv := m[key] 524 if vv == nil && len(strs) > 0 { 525 // More than likely this will be a single-element key. 526 // Most headers aren't multi-valued. 527 // Set the capacity on strs[0] to 1, so any future append 528 // won't extend the slice into the other strings. 529 vv, strs = strs[:1:1], strs[1:] 530 vv[0] = value 531 m[key] = vv 532 } else { 533 m[key] = append(vv, value) 534 } 535 536 if err != nil { 537 return m, err 538 } 539 } 540 } 541 542 // upcomingHeaderNewlines returns an approximation of the number of newlines 543 // that will be in this header. If it gets confused, it returns 0. 544 func (r *Reader) upcomingHeaderNewlines() (n int) { 545 // Try to determine the 'hint' size. 546 r.R.Peek(1) // force a buffer load if empty 547 s := r.R.Buffered() 548 if s == 0 { 549 return 550 } 551 peek, _ := r.R.Peek(s) 552 for len(peek) > 0 { 553 i := bytes.IndexByte(peek, '\n') 554 if i < 3 { 555 // Not present (-1) or found within the next few bytes, 556 // implying we're at the end ("\r\n\r\n" or "\n\n") 557 return 558 } 559 n++ 560 peek = peek[i+1:] 561 } 562 return 563 } 564 565 // CanonicalMIMEHeaderKey returns the canonical format of the 566 // MIME header key s. The canonicalization converts the first 567 // letter and any letter following a hyphen to upper case; 568 // the rest are converted to lowercase. For example, the 569 // canonical key for "accept-encoding" is "Accept-Encoding". 570 // MIME header keys are assumed to be ASCII only. 571 // If s contains a space or invalid header field bytes, it is 572 // returned without modifications. 573 func CanonicalMIMEHeaderKey(s string) string { 574 // Quick check for canonical encoding. 575 upper := true 576 for i := 0; i < len(s); i++ { 577 c := s[i] 578 if !validHeaderFieldByte(c) { 579 return s 580 } 581 if upper && 'a' <= c && c <= 'z' { 582 return canonicalMIMEHeaderKey([]byte(s)) 583 } 584 if !upper && 'A' <= c && c <= 'Z' { 585 return canonicalMIMEHeaderKey([]byte(s)) 586 } 587 upper = c == '-' 588 } 589 return s 590 } 591 592 const toLower = 'a' - 'A' 593 594 // validHeaderFieldByte reports whether b is a valid byte in a header 595 // field name. RFC 7230 says: 596 // header-field = field-name ":" OWS field-value OWS 597 // field-name = token 598 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 599 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 600 // token = 1*tchar 601 func validHeaderFieldByte(b byte) bool { 602 return int(b) < len(isTokenTable) && isTokenTable[b] 603 } 604 605 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 606 // allowed to mutate the provided byte slice before returning the 607 // string. 608 // 609 // For invalid inputs (if a contains spaces or non-token bytes), a 610 // is unchanged and a string copy is returned. 611 func canonicalMIMEHeaderKey(a []byte) string { 612 // See if a looks like a header key. If not, return it unchanged. 613 for _, c := range a { 614 if validHeaderFieldByte(c) { 615 continue 616 } 617 // Don't canonicalize. 618 return string(a) 619 } 620 621 upper := true 622 for i, c := range a { 623 // Canonicalize: first letter upper case 624 // and upper case after each dash. 625 // (Host, User-Agent, If-Modified-Since). 626 // MIME headers are ASCII only, so no Unicode issues. 627 if upper && 'a' <= c && c <= 'z' { 628 c -= toLower 629 } else if !upper && 'A' <= c && c <= 'Z' { 630 c += toLower 631 } 632 a[i] = c 633 upper = c == '-' // for next time 634 } 635 // The compiler recognizes m[string(byteSlice)] as a special 636 // case, so a copy of a's bytes into a new string does not 637 // happen in this map lookup: 638 if v := commonHeader[string(a)]; v != "" { 639 return v 640 } 641 return string(a) 642 } 643 644 // commonHeader interns common header strings. 645 var commonHeader = make(map[string]string) 646 647 func init() { 648 for _, v := range []string{ 649 "Accept", 650 "Accept-Charset", 651 "Accept-Encoding", 652 "Accept-Language", 653 "Accept-Ranges", 654 "Cache-Control", 655 "Cc", 656 "Connection", 657 "Content-Id", 658 "Content-Language", 659 "Content-Length", 660 "Content-Transfer-Encoding", 661 "Content-Type", 662 "Cookie", 663 "Date", 664 "Dkim-Signature", 665 "Etag", 666 "Expires", 667 "From", 668 "Host", 669 "If-Modified-Since", 670 "If-None-Match", 671 "In-Reply-To", 672 "Last-Modified", 673 "Location", 674 "Message-Id", 675 "Mime-Version", 676 "Pragma", 677 "Received", 678 "Return-Path", 679 "Server", 680 "Set-Cookie", 681 "Subject", 682 "To", 683 "User-Agent", 684 "Via", 685 "X-Forwarded-For", 686 "X-Imforwards", 687 "X-Powered-By", 688 } { 689 commonHeader[v] = v 690 } 691 } 692 693 // isTokenTable is a copy of net/http/lex.go's isTokenTable. 694 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators 695 var isTokenTable = [127]bool{ 696 '!': true, 697 '#': true, 698 '$': true, 699 '%': true, 700 '&': true, 701 '\'': true, 702 '*': true, 703 '+': true, 704 '-': true, 705 '.': true, 706 '0': true, 707 '1': true, 708 '2': true, 709 '3': true, 710 '4': true, 711 '5': true, 712 '6': true, 713 '7': true, 714 '8': true, 715 '9': true, 716 'A': true, 717 'B': true, 718 'C': true, 719 'D': true, 720 'E': true, 721 'F': true, 722 'G': true, 723 'H': true, 724 'I': true, 725 'J': true, 726 'K': true, 727 'L': true, 728 'M': true, 729 'N': true, 730 'O': true, 731 'P': true, 732 'Q': true, 733 'R': true, 734 'S': true, 735 'T': true, 736 'U': true, 737 'W': true, 738 'V': true, 739 'X': true, 740 'Y': true, 741 'Z': true, 742 '^': true, 743 '_': true, 744 '`': true, 745 'a': true, 746 'b': true, 747 'c': true, 748 'd': true, 749 'e': true, 750 'f': true, 751 'g': true, 752 'h': true, 753 'i': true, 754 'j': true, 755 'k': true, 756 'l': true, 757 'm': true, 758 'n': true, 759 'o': true, 760 'p': true, 761 'q': true, 762 'r': true, 763 's': true, 764 't': true, 765 'u': true, 766 'v': true, 767 'w': true, 768 'x': true, 769 'y': true, 770 'z': true, 771 '|': true, 772 '~': true, 773 }