github.com/aloncn/graphics-go@v0.0.1/src/net/textproto/reader.go

github.com/aloncn/graphics-go@v0.0.1/src/net/textproto/reader.go (about)

     1  // Copyright 2010 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textproto
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"io"
    11  	"io/ioutil"
    12  	"strconv"
    13  	"strings"
    14  )
    15  
    16  // A Reader implements convenience methods for reading requests
    17  // or responses from a text protocol network connection.
    18  type Reader struct {
    19  	R   *bufio.Reader
    20  	dot *dotReader
    21  	buf []byte // a re-usable buffer for readContinuedLineSlice
    22  }
    23  
    24  // NewReader returns a new Reader reading from r.
    25  //
    26  // To avoid denial of service attacks, the provided bufio.Reader
    27  // should be reading from an io.LimitReader or similar Reader to bound
    28  // the size of responses.
    29  func NewReader(r *bufio.Reader) *Reader {
    30  	return &Reader{R: r}
    31  }
    32  
    33  // ReadLine reads a single line from r,
    34  // eliding the final \n or \r\n from the returned string.
    35  func (r *Reader) ReadLine() (string, error) {
    36  	line, err := r.readLineSlice()
    37  	return string(line), err
    38  }
    39  
    40  // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
    41  func (r *Reader) ReadLineBytes() ([]byte, error) {
    42  	line, err := r.readLineSlice()
    43  	if line != nil {
    44  		buf := make([]byte, len(line))
    45  		copy(buf, line)
    46  		line = buf
    47  	}
    48  	return line, err
    49  }
    50  
    51  func (r *Reader) readLineSlice() ([]byte, error) {
    52  	r.closeDot()
    53  	var line []byte
    54  	for {
    55  		l, more, err := r.R.ReadLine()
    56  		if err != nil {
    57  			return nil, err
    58  		}
    59  		// Avoid the copy if the first call produced a full line.
    60  		if line == nil && !more {
    61  			return l, nil
    62  		}
    63  		line = append(line, l...)
    64  		if !more {
    65  			break
    66  		}
    67  	}
    68  	return line, nil
    69  }
    70  
    71  // ReadContinuedLine reads a possibly continued line from r,
    72  // eliding the final trailing ASCII white space.
    73  // Lines after the first are considered continuations if they
    74  // begin with a space or tab character.  In the returned data,
    75  // continuation lines are separated from the previous line
    76  // only by a single space: the newline and leading white space
    77  // are removed.
    78  //
    79  // For example, consider this input:
    80  //
    81  //	Line 1
    82  //	  continued...
    83  //	Line 2
    84  //
    85  // The first call to ReadContinuedLine will return "Line 1 continued..."
    86  // and the second will return "Line 2".
    87  //
    88  // A line consisting of only white space is never continued.
    89  //
    90  func (r *Reader) ReadContinuedLine() (string, error) {
    91  	line, err := r.readContinuedLineSlice()
    92  	return string(line), err
    93  }
    94  
    95  // trim returns s with leading and trailing spaces and tabs removed.
    96  // It does not assume Unicode or UTF-8.
    97  func trim(s []byte) []byte {
    98  	i := 0
    99  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   100  		i++
   101  	}
   102  	n := len(s)
   103  	for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
   104  		n--
   105  	}
   106  	return s[i:n]
   107  }
   108  
   109  // ReadContinuedLineBytes is like ReadContinuedLine but
   110  // returns a []byte instead of a string.
   111  func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   112  	line, err := r.readContinuedLineSlice()
   113  	if line != nil {
   114  		buf := make([]byte, len(line))
   115  		copy(buf, line)
   116  		line = buf
   117  	}
   118  	return line, err
   119  }
   120  
   121  func (r *Reader) readContinuedLineSlice() ([]byte, error) {
   122  	// Read the first line.
   123  	line, err := r.readLineSlice()
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  	if len(line) == 0 { // blank line - no continuation
   128  		return line, nil
   129  	}
   130  
   131  	// Optimistically assume that we have started to buffer the next line
   132  	// and it starts with an ASCII letter (the next header key), so we can
   133  	// avoid copying that buffered data around in memory and skipping over
   134  	// non-existent whitespace.
   135  	if r.R.Buffered() > 1 {
   136  		peek, err := r.R.Peek(1)
   137  		if err == nil && isASCIILetter(peek[0]) {
   138  			return trim(line), nil
   139  		}
   140  	}
   141  
   142  	// ReadByte or the next readLineSlice will flush the read buffer;
   143  	// copy the slice into buf.
   144  	r.buf = append(r.buf[:0], trim(line)...)
   145  
   146  	// Read continuation lines.
   147  	for r.skipSpace() > 0 {
   148  		line, err := r.readLineSlice()
   149  		if err != nil {
   150  			break
   151  		}
   152  		r.buf = append(r.buf, ' ')
   153  		r.buf = append(r.buf, trim(line)...)
   154  	}
   155  	return r.buf, nil
   156  }
   157  
   158  // skipSpace skips R over all spaces and returns the number of bytes skipped.
   159  func (r *Reader) skipSpace() int {
   160  	n := 0
   161  	for {
   162  		c, err := r.R.ReadByte()
   163  		if err != nil {
   164  			// Bufio will keep err until next read.
   165  			break
   166  		}
   167  		if c != ' ' && c != '\t' {
   168  			r.R.UnreadByte()
   169  			break
   170  		}
   171  		n++
   172  	}
   173  	return n
   174  }
   175  
   176  func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   177  	line, err := r.ReadLine()
   178  	if err != nil {
   179  		return
   180  	}
   181  	return parseCodeLine(line, expectCode)
   182  }
   183  
   184  func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   185  	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   186  		err = ProtocolError("short response: " + line)
   187  		return
   188  	}
   189  	continued = line[3] == '-'
   190  	code, err = strconv.Atoi(line[0:3])
   191  	if err != nil || code < 100 {
   192  		err = ProtocolError("invalid response code: " + line)
   193  		return
   194  	}
   195  	message = line[4:]
   196  	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   197  		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   198  		100 <= expectCode && expectCode < 1000 && code != expectCode {
   199  		err = &Error{code, message}
   200  	}
   201  	return
   202  }
   203  
   204  // ReadCodeLine reads a response code line of the form
   205  //	code message
   206  // where code is a three-digit status code and the message
   207  // extends to the rest of the line.  An example of such a line is:
   208  //	220 plan9.bell-labs.com ESMTP
   209  //
   210  // If the prefix of the status does not match the digits in expectCode,
   211  // ReadCodeLine returns with err set to &Error{code, message}.
   212  // For example, if expectCode is 31, an error will be returned if
   213  // the status is not in the range [310,319].
   214  //
   215  // If the response is multi-line, ReadCodeLine returns an error.
   216  //
   217  // An expectCode <= 0 disables the check of the status code.
   218  //
   219  func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   220  	code, continued, message, err := r.readCodeLine(expectCode)
   221  	if err == nil && continued {
   222  		err = ProtocolError("unexpected multi-line response: " + message)
   223  	}
   224  	return
   225  }
   226  
   227  // ReadResponse reads a multi-line response of the form:
   228  //
   229  //	code-message line 1
   230  //	code-message line 2
   231  //	...
   232  //	code message line n
   233  //
   234  // where code is a three-digit status code. The first line starts with the
   235  // code and a hyphen. The response is terminated by a line that starts
   236  // with the same code followed by a space. Each line in message is
   237  // separated by a newline (\n).
   238  //
   239  // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
   240  // details of another form of response accepted:
   241  //
   242  //  code-message line 1
   243  //  message line 2
   244  //  ...
   245  //  code message line n
   246  //
   247  // If the prefix of the status does not match the digits in expectCode,
   248  // ReadResponse returns with err set to &Error{code, message}.
   249  // For example, if expectCode is 31, an error will be returned if
   250  // the status is not in the range [310,319].
   251  //
   252  // An expectCode <= 0 disables the check of the status code.
   253  //
   254  func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   255  	code, continued, message, err := r.readCodeLine(expectCode)
   256  	multi := continued
   257  	for continued {
   258  		line, err := r.ReadLine()
   259  		if err != nil {
   260  			return 0, "", err
   261  		}
   262  
   263  		var code2 int
   264  		var moreMessage string
   265  		code2, continued, moreMessage, err = parseCodeLine(line, 0)
   266  		if err != nil || code2 != code {
   267  			message += "\n" + strings.TrimRight(line, "\r\n")
   268  			continued = true
   269  			continue
   270  		}
   271  		message += "\n" + moreMessage
   272  	}
   273  	if err != nil && multi && message != "" {
   274  		// replace one line error message with all lines (full message)
   275  		err = &Error{code, message}
   276  	}
   277  	return
   278  }
   279  
   280  // DotReader returns a new Reader that satisfies Reads using the
   281  // decoded text of a dot-encoded block read from r.
   282  // The returned Reader is only valid until the next call
   283  // to a method on r.
   284  //
   285  // Dot encoding is a common framing used for data blocks
   286  // in text protocols such as SMTP.  The data consists of a sequence
   287  // of lines, each of which ends in "\r\n".  The sequence itself
   288  // ends at a line containing just a dot: ".\r\n".  Lines beginning
   289  // with a dot are escaped with an additional dot to avoid
   290  // looking like the end of the sequence.
   291  //
   292  // The decoded form returned by the Reader's Read method
   293  // rewrites the "\r\n" line endings into the simpler "\n",
   294  // removes leading dot escapes if present, and stops with error io.EOF
   295  // after consuming (and discarding) the end-of-sequence line.
   296  func (r *Reader) DotReader() io.Reader {
   297  	r.closeDot()
   298  	r.dot = &dotReader{r: r}
   299  	return r.dot
   300  }
   301  
   302  type dotReader struct {
   303  	r     *Reader
   304  	state int
   305  }
   306  
   307  // Read satisfies reads by decoding dot-encoded data read from d.r.
   308  func (d *dotReader) Read(b []byte) (n int, err error) {
   309  	// Run data through a simple state machine to
   310  	// elide leading dots, rewrite trailing \r\n into \n,
   311  	// and detect ending .\r\n line.
   312  	const (
   313  		stateBeginLine = iota // beginning of line; initial state; must be zero
   314  		stateDot              // read . at beginning of line
   315  		stateDotCR            // read .\r at beginning of line
   316  		stateCR               // read \r (possibly at end of line)
   317  		stateData             // reading data in middle of line
   318  		stateEOF              // reached .\r\n end marker line
   319  	)
   320  	br := d.r.R
   321  	for n < len(b) && d.state != stateEOF {
   322  		var c byte
   323  		c, err = br.ReadByte()
   324  		if err != nil {
   325  			if err == io.EOF {
   326  				err = io.ErrUnexpectedEOF
   327  			}
   328  			break
   329  		}
   330  		switch d.state {
   331  		case stateBeginLine:
   332  			if c == '.' {
   333  				d.state = stateDot
   334  				continue
   335  			}
   336  			if c == '\r' {
   337  				d.state = stateCR
   338  				continue
   339  			}
   340  			d.state = stateData
   341  
   342  		case stateDot:
   343  			if c == '\r' {
   344  				d.state = stateDotCR
   345  				continue
   346  			}
   347  			if c == '\n' {
   348  				d.state = stateEOF
   349  				continue
   350  			}
   351  			d.state = stateData
   352  
   353  		case stateDotCR:
   354  			if c == '\n' {
   355  				d.state = stateEOF
   356  				continue
   357  			}
   358  			// Not part of .\r\n.
   359  			// Consume leading dot and emit saved \r.
   360  			br.UnreadByte()
   361  			c = '\r'
   362  			d.state = stateData
   363  
   364  		case stateCR:
   365  			if c == '\n' {
   366  				d.state = stateBeginLine
   367  				break
   368  			}
   369  			// Not part of \r\n.  Emit saved \r
   370  			br.UnreadByte()
   371  			c = '\r'
   372  			d.state = stateData
   373  
   374  		case stateData:
   375  			if c == '\r' {
   376  				d.state = stateCR
   377  				continue
   378  			}
   379  			if c == '\n' {
   380  				d.state = stateBeginLine
   381  			}
   382  		}
   383  		b[n] = c
   384  		n++
   385  	}
   386  	if err == nil && d.state == stateEOF {
   387  		err = io.EOF
   388  	}
   389  	if err != nil && d.r.dot == d {
   390  		d.r.dot = nil
   391  	}
   392  	return
   393  }
   394  
   395  // closeDot drains the current DotReader if any,
   396  // making sure that it reads until the ending dot line.
   397  func (r *Reader) closeDot() {
   398  	if r.dot == nil {
   399  		return
   400  	}
   401  	buf := make([]byte, 128)
   402  	for r.dot != nil {
   403  		// When Read reaches EOF or an error,
   404  		// it will set r.dot == nil.
   405  		r.dot.Read(buf)
   406  	}
   407  }
   408  
   409  // ReadDotBytes reads a dot-encoding and returns the decoded data.
   410  //
   411  // See the documentation for the DotReader method for details about dot-encoding.
   412  func (r *Reader) ReadDotBytes() ([]byte, error) {
   413  	return ioutil.ReadAll(r.DotReader())
   414  }
   415  
   416  // ReadDotLines reads a dot-encoding and returns a slice
   417  // containing the decoded lines, with the final \r\n or \n elided from each.
   418  //
   419  // See the documentation for the DotReader method for details about dot-encoding.
   420  func (r *Reader) ReadDotLines() ([]string, error) {
   421  	// We could use ReadDotBytes and then Split it,
   422  	// but reading a line at a time avoids needing a
   423  	// large contiguous block of memory and is simpler.
   424  	var v []string
   425  	var err error
   426  	for {
   427  		var line string
   428  		line, err = r.ReadLine()
   429  		if err != nil {
   430  			if err == io.EOF {
   431  				err = io.ErrUnexpectedEOF
   432  			}
   433  			break
   434  		}
   435  
   436  		// Dot by itself marks end; otherwise cut one dot.
   437  		if len(line) > 0 && line[0] == '.' {
   438  			if len(line) == 1 {
   439  				break
   440  			}
   441  			line = line[1:]
   442  		}
   443  		v = append(v, line)
   444  	}
   445  	return v, err
   446  }
   447  
   448  // ReadMIMEHeader reads a MIME-style header from r.
   449  // The header is a sequence of possibly continued Key: Value lines
   450  // ending in a blank line.
   451  // The returned map m maps CanonicalMIMEHeaderKey(key) to a
   452  // sequence of values in the same order encountered in the input.
   453  //
   454  // For example, consider this input:
   455  //
   456  //	My-Key: Value 1
   457  //	Long-Key: Even
   458  //	       Longer Value
   459  //	My-Key: Value 2
   460  //
   461  // Given that input, ReadMIMEHeader returns the map:
   462  //
   463  //	map[string][]string{
   464  //		"My-Key": {"Value 1", "Value 2"},
   465  //		"Long-Key": {"Even Longer Value"},
   466  //	}
   467  //
   468  func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   469  	// Avoid lots of small slice allocations later by allocating one
   470  	// large one ahead of time which we'll cut up into smaller
   471  	// slices. If this isn't big enough later, we allocate small ones.
   472  	var strs []string
   473  	hint := r.upcomingHeaderNewlines()
   474  	if hint > 0 {
   475  		strs = make([]string, hint)
   476  	}
   477  
   478  	m := make(MIMEHeader, hint)
   479  	for {
   480  		kv, err := r.readContinuedLineSlice()
   481  		if len(kv) == 0 {
   482  			return m, err
   483  		}
   484  
   485  		// Key ends at first colon; should not have spaces but
   486  		// they appear in the wild, violating specs, so we
   487  		// remove them if present.
   488  		i := bytes.IndexByte(kv, ':')
   489  		if i < 0 {
   490  			return m, ProtocolError("malformed MIME header line: " + string(kv))
   491  		}
   492  		endKey := i
   493  		for endKey > 0 && kv[endKey-1] == ' ' {
   494  			endKey--
   495  		}
   496  		key := canonicalMIMEHeaderKey(kv[:endKey])
   497  
   498  		// As per RFC 7230 field-name is a token, tokens consist of one or more chars.
   499  		// We could return a ProtocolError here, but better to be liberal in what we
   500  		// accept, so if we get an empty key, skip it.
   501  		if key == "" {
   502  			continue
   503  		}
   504  
   505  		// Skip initial spaces in value.
   506  		i++ // skip colon
   507  		for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
   508  			i++
   509  		}
   510  		value := string(kv[i:])
   511  
   512  		vv := m[key]
   513  		if vv == nil && len(strs) > 0 {
   514  			// More than likely this will be a single-element key.
   515  			// Most headers aren't multi-valued.
   516  			// Set the capacity on strs[0] to 1, so any future append
   517  			// won't extend the slice into the other strings.
   518  			vv, strs = strs[:1:1], strs[1:]
   519  			vv[0] = value
   520  			m[key] = vv
   521  		} else {
   522  			m[key] = append(vv, value)
   523  		}
   524  
   525  		if err != nil {
   526  			return m, err
   527  		}
   528  	}
   529  }
   530  
   531  // upcomingHeaderNewlines returns an approximation of the number of newlines
   532  // that will be in this header. If it gets confused, it returns 0.
   533  func (r *Reader) upcomingHeaderNewlines() (n int) {
   534  	// Try to determine the 'hint' size.
   535  	r.R.Peek(1) // force a buffer load if empty
   536  	s := r.R.Buffered()
   537  	if s == 0 {
   538  		return
   539  	}
   540  	peek, _ := r.R.Peek(s)
   541  	for len(peek) > 0 {
   542  		i := bytes.IndexByte(peek, '\n')
   543  		if i < 3 {
   544  			// Not present (-1) or found within the next few bytes,
   545  			// implying we're at the end ("\r\n\r\n" or "\n\n")
   546  			return
   547  		}
   548  		n++
   549  		peek = peek[i+1:]
   550  	}
   551  	return
   552  }
   553  
   554  // CanonicalMIMEHeaderKey returns the canonical format of the
   555  // MIME header key s.  The canonicalization converts the first
   556  // letter and any letter following a hyphen to upper case;
   557  // the rest are converted to lowercase.  For example, the
   558  // canonical key for "accept-encoding" is "Accept-Encoding".
   559  // MIME header keys are assumed to be ASCII only.
   560  // If s contains a space or invalid header field bytes, it is
   561  // returned without modifications.
   562  func CanonicalMIMEHeaderKey(s string) string {
   563  	// Quick check for canonical encoding.
   564  	upper := true
   565  	for i := 0; i < len(s); i++ {
   566  		c := s[i]
   567  		if !validHeaderFieldByte(c) {
   568  			return s
   569  		}
   570  		if upper && 'a' <= c && c <= 'z' {
   571  			return canonicalMIMEHeaderKey([]byte(s))
   572  		}
   573  		if !upper && 'A' <= c && c <= 'Z' {
   574  			return canonicalMIMEHeaderKey([]byte(s))
   575  		}
   576  		upper = c == '-'
   577  	}
   578  	return s
   579  }
   580  
   581  const toLower = 'a' - 'A'
   582  
   583  // validHeaderFieldByte reports whether b is a valid byte in a header
   584  // field key. This is actually stricter than RFC 7230, which says:
   585  //   tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
   586  //           "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
   587  //   token = 1*tchar
   588  // TODO: revisit in Go 1.6+ and possibly expand this. But note that many
   589  // servers have historically dropped '_' to prevent ambiguities when mapping
   590  // to CGI environment variables.
   591  func validHeaderFieldByte(b byte) bool {
   592  	return ('A' <= b && b <= 'Z') ||
   593  		('a' <= b && b <= 'z') ||
   594  		('0' <= b && b <= '9') ||
   595  		b == '-'
   596  }
   597  
   598  // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
   599  // allowed to mutate the provided byte slice before returning the
   600  // string.
   601  //
   602  // For invalid inputs (if a contains spaces or non-token bytes), a
   603  // is unchanged and a string copy is returned.
   604  func canonicalMIMEHeaderKey(a []byte) string {
   605  	// See if a looks like a header key. If not, return it unchanged.
   606  	for _, c := range a {
   607  		if validHeaderFieldByte(c) {
   608  			continue
   609  		}
   610  		// Don't canonicalize.
   611  		return string(a)
   612  	}
   613  
   614  	upper := true
   615  	for i, c := range a {
   616  		// Canonicalize: first letter upper case
   617  		// and upper case after each dash.
   618  		// (Host, User-Agent, If-Modified-Since).
   619  		// MIME headers are ASCII only, so no Unicode issues.
   620  		if upper && 'a' <= c && c <= 'z' {
   621  			c -= toLower
   622  		} else if !upper && 'A' <= c && c <= 'Z' {
   623  			c += toLower
   624  		}
   625  		a[i] = c
   626  		upper = c == '-' // for next time
   627  	}
   628  	// The compiler recognizes m[string(byteSlice)] as a special
   629  	// case, so a copy of a's bytes into a new string does not
   630  	// happen in this map lookup:
   631  	if v := commonHeader[string(a)]; v != "" {
   632  		return v
   633  	}
   634  	return string(a)
   635  }
   636  
   637  // commonHeader interns common header strings.
   638  var commonHeader = make(map[string]string)
   639  
   640  func init() {
   641  	for _, v := range []string{
   642  		"Accept",
   643  		"Accept-Charset",
   644  		"Accept-Encoding",
   645  		"Accept-Language",
   646  		"Accept-Ranges",
   647  		"Cache-Control",
   648  		"Cc",
   649  		"Connection",
   650  		"Content-Id",
   651  		"Content-Language",
   652  		"Content-Length",
   653  		"Content-Transfer-Encoding",
   654  		"Content-Type",
   655  		"Cookie",
   656  		"Date",
   657  		"Dkim-Signature",
   658  		"Etag",
   659  		"Expires",
   660  		"From",
   661  		"Host",
   662  		"If-Modified-Since",
   663  		"If-None-Match",
   664  		"In-Reply-To",
   665  		"Last-Modified",
   666  		"Location",
   667  		"Message-Id",
   668  		"Mime-Version",
   669  		"Pragma",
   670  		"Received",
   671  		"Return-Path",
   672  		"Server",
   673  		"Set-Cookie",
   674  		"Subject",
   675  		"To",
   676  		"User-Agent",
   677  		"Via",
   678  		"X-Forwarded-For",
   679  		"X-Imforwards",
   680  		"X-Powered-By",
   681  	} {
   682  		commonHeader[v] = v
   683  	}
   684  }