github.com/aloncn/graphics-go@v0.0.1/src/net/textproto/reader.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textproto 6 7 import ( 8 "bufio" 9 "bytes" 10 "io" 11 "io/ioutil" 12 "strconv" 13 "strings" 14 ) 15 16 // A Reader implements convenience methods for reading requests 17 // or responses from a text protocol network connection. 18 type Reader struct { 19 R *bufio.Reader 20 dot *dotReader 21 buf []byte // a re-usable buffer for readContinuedLineSlice 22 } 23 24 // NewReader returns a new Reader reading from r. 25 // 26 // To avoid denial of service attacks, the provided bufio.Reader 27 // should be reading from an io.LimitReader or similar Reader to bound 28 // the size of responses. 29 func NewReader(r *bufio.Reader) *Reader { 30 return &Reader{R: r} 31 } 32 33 // ReadLine reads a single line from r, 34 // eliding the final \n or \r\n from the returned string. 35 func (r *Reader) ReadLine() (string, error) { 36 line, err := r.readLineSlice() 37 return string(line), err 38 } 39 40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string. 41 func (r *Reader) ReadLineBytes() ([]byte, error) { 42 line, err := r.readLineSlice() 43 if line != nil { 44 buf := make([]byte, len(line)) 45 copy(buf, line) 46 line = buf 47 } 48 return line, err 49 } 50 51 func (r *Reader) readLineSlice() ([]byte, error) { 52 r.closeDot() 53 var line []byte 54 for { 55 l, more, err := r.R.ReadLine() 56 if err != nil { 57 return nil, err 58 } 59 // Avoid the copy if the first call produced a full line. 60 if line == nil && !more { 61 return l, nil 62 } 63 line = append(line, l...) 64 if !more { 65 break 66 } 67 } 68 return line, nil 69 } 70 71 // ReadContinuedLine reads a possibly continued line from r, 72 // eliding the final trailing ASCII white space. 73 // Lines after the first are considered continuations if they 74 // begin with a space or tab character. In the returned data, 75 // continuation lines are separated from the previous line 76 // only by a single space: the newline and leading white space 77 // are removed. 78 // 79 // For example, consider this input: 80 // 81 // Line 1 82 // continued... 83 // Line 2 84 // 85 // The first call to ReadContinuedLine will return "Line 1 continued..." 86 // and the second will return "Line 2". 87 // 88 // A line consisting of only white space is never continued. 89 // 90 func (r *Reader) ReadContinuedLine() (string, error) { 91 line, err := r.readContinuedLineSlice() 92 return string(line), err 93 } 94 95 // trim returns s with leading and trailing spaces and tabs removed. 96 // It does not assume Unicode or UTF-8. 97 func trim(s []byte) []byte { 98 i := 0 99 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 100 i++ 101 } 102 n := len(s) 103 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') { 104 n-- 105 } 106 return s[i:n] 107 } 108 109 // ReadContinuedLineBytes is like ReadContinuedLine but 110 // returns a []byte instead of a string. 111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { 112 line, err := r.readContinuedLineSlice() 113 if line != nil { 114 buf := make([]byte, len(line)) 115 copy(buf, line) 116 line = buf 117 } 118 return line, err 119 } 120 121 func (r *Reader) readContinuedLineSlice() ([]byte, error) { 122 // Read the first line. 123 line, err := r.readLineSlice() 124 if err != nil { 125 return nil, err 126 } 127 if len(line) == 0 { // blank line - no continuation 128 return line, nil 129 } 130 131 // Optimistically assume that we have started to buffer the next line 132 // and it starts with an ASCII letter (the next header key), so we can 133 // avoid copying that buffered data around in memory and skipping over 134 // non-existent whitespace. 135 if r.R.Buffered() > 1 { 136 peek, err := r.R.Peek(1) 137 if err == nil && isASCIILetter(peek[0]) { 138 return trim(line), nil 139 } 140 } 141 142 // ReadByte or the next readLineSlice will flush the read buffer; 143 // copy the slice into buf. 144 r.buf = append(r.buf[:0], trim(line)...) 145 146 // Read continuation lines. 147 for r.skipSpace() > 0 { 148 line, err := r.readLineSlice() 149 if err != nil { 150 break 151 } 152 r.buf = append(r.buf, ' ') 153 r.buf = append(r.buf, trim(line)...) 154 } 155 return r.buf, nil 156 } 157 158 // skipSpace skips R over all spaces and returns the number of bytes skipped. 159 func (r *Reader) skipSpace() int { 160 n := 0 161 for { 162 c, err := r.R.ReadByte() 163 if err != nil { 164 // Bufio will keep err until next read. 165 break 166 } 167 if c != ' ' && c != '\t' { 168 r.R.UnreadByte() 169 break 170 } 171 n++ 172 } 173 return n 174 } 175 176 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { 177 line, err := r.ReadLine() 178 if err != nil { 179 return 180 } 181 return parseCodeLine(line, expectCode) 182 } 183 184 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { 185 if len(line) < 4 || line[3] != ' ' && line[3] != '-' { 186 err = ProtocolError("short response: " + line) 187 return 188 } 189 continued = line[3] == '-' 190 code, err = strconv.Atoi(line[0:3]) 191 if err != nil || code < 100 { 192 err = ProtocolError("invalid response code: " + line) 193 return 194 } 195 message = line[4:] 196 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || 197 10 <= expectCode && expectCode < 100 && code/10 != expectCode || 198 100 <= expectCode && expectCode < 1000 && code != expectCode { 199 err = &Error{code, message} 200 } 201 return 202 } 203 204 // ReadCodeLine reads a response code line of the form 205 // code message 206 // where code is a three-digit status code and the message 207 // extends to the rest of the line. An example of such a line is: 208 // 220 plan9.bell-labs.com ESMTP 209 // 210 // If the prefix of the status does not match the digits in expectCode, 211 // ReadCodeLine returns with err set to &Error{code, message}. 212 // For example, if expectCode is 31, an error will be returned if 213 // the status is not in the range [310,319]. 214 // 215 // If the response is multi-line, ReadCodeLine returns an error. 216 // 217 // An expectCode <= 0 disables the check of the status code. 218 // 219 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { 220 code, continued, message, err := r.readCodeLine(expectCode) 221 if err == nil && continued { 222 err = ProtocolError("unexpected multi-line response: " + message) 223 } 224 return 225 } 226 227 // ReadResponse reads a multi-line response of the form: 228 // 229 // code-message line 1 230 // code-message line 2 231 // ... 232 // code message line n 233 // 234 // where code is a three-digit status code. The first line starts with the 235 // code and a hyphen. The response is terminated by a line that starts 236 // with the same code followed by a space. Each line in message is 237 // separated by a newline (\n). 238 // 239 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for 240 // details of another form of response accepted: 241 // 242 // code-message line 1 243 // message line 2 244 // ... 245 // code message line n 246 // 247 // If the prefix of the status does not match the digits in expectCode, 248 // ReadResponse returns with err set to &Error{code, message}. 249 // For example, if expectCode is 31, an error will be returned if 250 // the status is not in the range [310,319]. 251 // 252 // An expectCode <= 0 disables the check of the status code. 253 // 254 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { 255 code, continued, message, err := r.readCodeLine(expectCode) 256 multi := continued 257 for continued { 258 line, err := r.ReadLine() 259 if err != nil { 260 return 0, "", err 261 } 262 263 var code2 int 264 var moreMessage string 265 code2, continued, moreMessage, err = parseCodeLine(line, 0) 266 if err != nil || code2 != code { 267 message += "\n" + strings.TrimRight(line, "\r\n") 268 continued = true 269 continue 270 } 271 message += "\n" + moreMessage 272 } 273 if err != nil && multi && message != "" { 274 // replace one line error message with all lines (full message) 275 err = &Error{code, message} 276 } 277 return 278 } 279 280 // DotReader returns a new Reader that satisfies Reads using the 281 // decoded text of a dot-encoded block read from r. 282 // The returned Reader is only valid until the next call 283 // to a method on r. 284 // 285 // Dot encoding is a common framing used for data blocks 286 // in text protocols such as SMTP. The data consists of a sequence 287 // of lines, each of which ends in "\r\n". The sequence itself 288 // ends at a line containing just a dot: ".\r\n". Lines beginning 289 // with a dot are escaped with an additional dot to avoid 290 // looking like the end of the sequence. 291 // 292 // The decoded form returned by the Reader's Read method 293 // rewrites the "\r\n" line endings into the simpler "\n", 294 // removes leading dot escapes if present, and stops with error io.EOF 295 // after consuming (and discarding) the end-of-sequence line. 296 func (r *Reader) DotReader() io.Reader { 297 r.closeDot() 298 r.dot = &dotReader{r: r} 299 return r.dot 300 } 301 302 type dotReader struct { 303 r *Reader 304 state int 305 } 306 307 // Read satisfies reads by decoding dot-encoded data read from d.r. 308 func (d *dotReader) Read(b []byte) (n int, err error) { 309 // Run data through a simple state machine to 310 // elide leading dots, rewrite trailing \r\n into \n, 311 // and detect ending .\r\n line. 312 const ( 313 stateBeginLine = iota // beginning of line; initial state; must be zero 314 stateDot // read . at beginning of line 315 stateDotCR // read .\r at beginning of line 316 stateCR // read \r (possibly at end of line) 317 stateData // reading data in middle of line 318 stateEOF // reached .\r\n end marker line 319 ) 320 br := d.r.R 321 for n < len(b) && d.state != stateEOF { 322 var c byte 323 c, err = br.ReadByte() 324 if err != nil { 325 if err == io.EOF { 326 err = io.ErrUnexpectedEOF 327 } 328 break 329 } 330 switch d.state { 331 case stateBeginLine: 332 if c == '.' { 333 d.state = stateDot 334 continue 335 } 336 if c == '\r' { 337 d.state = stateCR 338 continue 339 } 340 d.state = stateData 341 342 case stateDot: 343 if c == '\r' { 344 d.state = stateDotCR 345 continue 346 } 347 if c == '\n' { 348 d.state = stateEOF 349 continue 350 } 351 d.state = stateData 352 353 case stateDotCR: 354 if c == '\n' { 355 d.state = stateEOF 356 continue 357 } 358 // Not part of .\r\n. 359 // Consume leading dot and emit saved \r. 360 br.UnreadByte() 361 c = '\r' 362 d.state = stateData 363 364 case stateCR: 365 if c == '\n' { 366 d.state = stateBeginLine 367 break 368 } 369 // Not part of \r\n. Emit saved \r 370 br.UnreadByte() 371 c = '\r' 372 d.state = stateData 373 374 case stateData: 375 if c == '\r' { 376 d.state = stateCR 377 continue 378 } 379 if c == '\n' { 380 d.state = stateBeginLine 381 } 382 } 383 b[n] = c 384 n++ 385 } 386 if err == nil && d.state == stateEOF { 387 err = io.EOF 388 } 389 if err != nil && d.r.dot == d { 390 d.r.dot = nil 391 } 392 return 393 } 394 395 // closeDot drains the current DotReader if any, 396 // making sure that it reads until the ending dot line. 397 func (r *Reader) closeDot() { 398 if r.dot == nil { 399 return 400 } 401 buf := make([]byte, 128) 402 for r.dot != nil { 403 // When Read reaches EOF or an error, 404 // it will set r.dot == nil. 405 r.dot.Read(buf) 406 } 407 } 408 409 // ReadDotBytes reads a dot-encoding and returns the decoded data. 410 // 411 // See the documentation for the DotReader method for details about dot-encoding. 412 func (r *Reader) ReadDotBytes() ([]byte, error) { 413 return ioutil.ReadAll(r.DotReader()) 414 } 415 416 // ReadDotLines reads a dot-encoding and returns a slice 417 // containing the decoded lines, with the final \r\n or \n elided from each. 418 // 419 // See the documentation for the DotReader method for details about dot-encoding. 420 func (r *Reader) ReadDotLines() ([]string, error) { 421 // We could use ReadDotBytes and then Split it, 422 // but reading a line at a time avoids needing a 423 // large contiguous block of memory and is simpler. 424 var v []string 425 var err error 426 for { 427 var line string 428 line, err = r.ReadLine() 429 if err != nil { 430 if err == io.EOF { 431 err = io.ErrUnexpectedEOF 432 } 433 break 434 } 435 436 // Dot by itself marks end; otherwise cut one dot. 437 if len(line) > 0 && line[0] == '.' { 438 if len(line) == 1 { 439 break 440 } 441 line = line[1:] 442 } 443 v = append(v, line) 444 } 445 return v, err 446 } 447 448 // ReadMIMEHeader reads a MIME-style header from r. 449 // The header is a sequence of possibly continued Key: Value lines 450 // ending in a blank line. 451 // The returned map m maps CanonicalMIMEHeaderKey(key) to a 452 // sequence of values in the same order encountered in the input. 453 // 454 // For example, consider this input: 455 // 456 // My-Key: Value 1 457 // Long-Key: Even 458 // Longer Value 459 // My-Key: Value 2 460 // 461 // Given that input, ReadMIMEHeader returns the map: 462 // 463 // map[string][]string{ 464 // "My-Key": {"Value 1", "Value 2"}, 465 // "Long-Key": {"Even Longer Value"}, 466 // } 467 // 468 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { 469 // Avoid lots of small slice allocations later by allocating one 470 // large one ahead of time which we'll cut up into smaller 471 // slices. If this isn't big enough later, we allocate small ones. 472 var strs []string 473 hint := r.upcomingHeaderNewlines() 474 if hint > 0 { 475 strs = make([]string, hint) 476 } 477 478 m := make(MIMEHeader, hint) 479 for { 480 kv, err := r.readContinuedLineSlice() 481 if len(kv) == 0 { 482 return m, err 483 } 484 485 // Key ends at first colon; should not have spaces but 486 // they appear in the wild, violating specs, so we 487 // remove them if present. 488 i := bytes.IndexByte(kv, ':') 489 if i < 0 { 490 return m, ProtocolError("malformed MIME header line: " + string(kv)) 491 } 492 endKey := i 493 for endKey > 0 && kv[endKey-1] == ' ' { 494 endKey-- 495 } 496 key := canonicalMIMEHeaderKey(kv[:endKey]) 497 498 // As per RFC 7230 field-name is a token, tokens consist of one or more chars. 499 // We could return a ProtocolError here, but better to be liberal in what we 500 // accept, so if we get an empty key, skip it. 501 if key == "" { 502 continue 503 } 504 505 // Skip initial spaces in value. 506 i++ // skip colon 507 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') { 508 i++ 509 } 510 value := string(kv[i:]) 511 512 vv := m[key] 513 if vv == nil && len(strs) > 0 { 514 // More than likely this will be a single-element key. 515 // Most headers aren't multi-valued. 516 // Set the capacity on strs[0] to 1, so any future append 517 // won't extend the slice into the other strings. 518 vv, strs = strs[:1:1], strs[1:] 519 vv[0] = value 520 m[key] = vv 521 } else { 522 m[key] = append(vv, value) 523 } 524 525 if err != nil { 526 return m, err 527 } 528 } 529 } 530 531 // upcomingHeaderNewlines returns an approximation of the number of newlines 532 // that will be in this header. If it gets confused, it returns 0. 533 func (r *Reader) upcomingHeaderNewlines() (n int) { 534 // Try to determine the 'hint' size. 535 r.R.Peek(1) // force a buffer load if empty 536 s := r.R.Buffered() 537 if s == 0 { 538 return 539 } 540 peek, _ := r.R.Peek(s) 541 for len(peek) > 0 { 542 i := bytes.IndexByte(peek, '\n') 543 if i < 3 { 544 // Not present (-1) or found within the next few bytes, 545 // implying we're at the end ("\r\n\r\n" or "\n\n") 546 return 547 } 548 n++ 549 peek = peek[i+1:] 550 } 551 return 552 } 553 554 // CanonicalMIMEHeaderKey returns the canonical format of the 555 // MIME header key s. The canonicalization converts the first 556 // letter and any letter following a hyphen to upper case; 557 // the rest are converted to lowercase. For example, the 558 // canonical key for "accept-encoding" is "Accept-Encoding". 559 // MIME header keys are assumed to be ASCII only. 560 // If s contains a space or invalid header field bytes, it is 561 // returned without modifications. 562 func CanonicalMIMEHeaderKey(s string) string { 563 // Quick check for canonical encoding. 564 upper := true 565 for i := 0; i < len(s); i++ { 566 c := s[i] 567 if !validHeaderFieldByte(c) { 568 return s 569 } 570 if upper && 'a' <= c && c <= 'z' { 571 return canonicalMIMEHeaderKey([]byte(s)) 572 } 573 if !upper && 'A' <= c && c <= 'Z' { 574 return canonicalMIMEHeaderKey([]byte(s)) 575 } 576 upper = c == '-' 577 } 578 return s 579 } 580 581 const toLower = 'a' - 'A' 582 583 // validHeaderFieldByte reports whether b is a valid byte in a header 584 // field key. This is actually stricter than RFC 7230, which says: 585 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 586 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 587 // token = 1*tchar 588 // TODO: revisit in Go 1.6+ and possibly expand this. But note that many 589 // servers have historically dropped '_' to prevent ambiguities when mapping 590 // to CGI environment variables. 591 func validHeaderFieldByte(b byte) bool { 592 return ('A' <= b && b <= 'Z') || 593 ('a' <= b && b <= 'z') || 594 ('0' <= b && b <= '9') || 595 b == '-' 596 } 597 598 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 599 // allowed to mutate the provided byte slice before returning the 600 // string. 601 // 602 // For invalid inputs (if a contains spaces or non-token bytes), a 603 // is unchanged and a string copy is returned. 604 func canonicalMIMEHeaderKey(a []byte) string { 605 // See if a looks like a header key. If not, return it unchanged. 606 for _, c := range a { 607 if validHeaderFieldByte(c) { 608 continue 609 } 610 // Don't canonicalize. 611 return string(a) 612 } 613 614 upper := true 615 for i, c := range a { 616 // Canonicalize: first letter upper case 617 // and upper case after each dash. 618 // (Host, User-Agent, If-Modified-Since). 619 // MIME headers are ASCII only, so no Unicode issues. 620 if upper && 'a' <= c && c <= 'z' { 621 c -= toLower 622 } else if !upper && 'A' <= c && c <= 'Z' { 623 c += toLower 624 } 625 a[i] = c 626 upper = c == '-' // for next time 627 } 628 // The compiler recognizes m[string(byteSlice)] as a special 629 // case, so a copy of a's bytes into a new string does not 630 // happen in this map lookup: 631 if v := commonHeader[string(a)]; v != "" { 632 return v 633 } 634 return string(a) 635 } 636 637 // commonHeader interns common header strings. 638 var commonHeader = make(map[string]string) 639 640 func init() { 641 for _, v := range []string{ 642 "Accept", 643 "Accept-Charset", 644 "Accept-Encoding", 645 "Accept-Language", 646 "Accept-Ranges", 647 "Cache-Control", 648 "Cc", 649 "Connection", 650 "Content-Id", 651 "Content-Language", 652 "Content-Length", 653 "Content-Transfer-Encoding", 654 "Content-Type", 655 "Cookie", 656 "Date", 657 "Dkim-Signature", 658 "Etag", 659 "Expires", 660 "From", 661 "Host", 662 "If-Modified-Since", 663 "If-None-Match", 664 "In-Reply-To", 665 "Last-Modified", 666 "Location", 667 "Message-Id", 668 "Mime-Version", 669 "Pragma", 670 "Received", 671 "Return-Path", 672 "Server", 673 "Set-Cookie", 674 "Subject", 675 "To", 676 "User-Agent", 677 "Via", 678 "X-Forwarded-For", 679 "X-Imforwards", 680 "X-Powered-By", 681 } { 682 commonHeader[v] = v 683 } 684 }