github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/mime/mediatype.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"strings"
    12  	"unicode"
    13  )
    14  
    15  // FormatMediaType serializes mediatype t and the parameters
    16  // param as a media type conforming to RFC 2045 and RFC 2616.
    17  // The type and parameter names are written in lower-case.
    18  // When any of the arguments result in a standard violation then
    19  // FormatMediaType returns the empty string.
    20  func FormatMediaType(t string, param map[string]string) string {
    21  	slash := strings.Index(t, "/")
    22  	if slash == -1 {
    23  		return ""
    24  	}
    25  	major, sub := t[:slash], t[slash+1:]
    26  	if !isToken(major) || !isToken(sub) {
    27  		return ""
    28  	}
    29  	var b bytes.Buffer
    30  	b.WriteString(strings.ToLower(major))
    31  	b.WriteByte('/')
    32  	b.WriteString(strings.ToLower(sub))
    33  
    34  	for attribute, value := range param {
    35  		b.WriteByte(';')
    36  		b.WriteByte(' ')
    37  		if !isToken(attribute) {
    38  			return ""
    39  		}
    40  		b.WriteString(strings.ToLower(attribute))
    41  		b.WriteByte('=')
    42  		if isToken(value) {
    43  			b.WriteString(value)
    44  			continue
    45  		}
    46  
    47  		b.WriteByte('"')
    48  		offset := 0
    49  		for index, character := range value {
    50  			if character == '"' || character == '\r' {
    51  				b.WriteString(value[offset:index])
    52  				offset = index
    53  				b.WriteByte('\\')
    54  			}
    55  			if character&0x80 != 0 {
    56  				return ""
    57  			}
    58  		}
    59  		b.WriteString(value[offset:])
    60  		b.WriteByte('"')
    61  	}
    62  	return b.String()
    63  }
    64  
    65  func checkMediaTypeDisposition(s string) error {
    66  	typ, rest := consumeToken(s)
    67  	if typ == "" {
    68  		return errors.New("mime: no media type")
    69  	}
    70  	if rest == "" {
    71  		return nil
    72  	}
    73  	if !strings.HasPrefix(rest, "/") {
    74  		return errors.New("mime: expected slash after first token")
    75  	}
    76  	subtype, rest := consumeToken(rest[1:])
    77  	if subtype == "" {
    78  		return errors.New("mime: expected token after slash")
    79  	}
    80  	if rest != "" {
    81  		return errors.New("mime: unexpected content after media subtype")
    82  	}
    83  	return nil
    84  }
    85  
    86  // ParseMediaType parses a media type value and any optional
    87  // parameters, per RFC 1521.  Media types are the values in
    88  // Content-Type and Content-Disposition headers (RFC 2183).
    89  // On success, ParseMediaType returns the media type converted
    90  // to lowercase and trimmed of white space and a non-nil map.
    91  // The returned map, params, maps from the lowercase
    92  // attribute to the attribute value with its case preserved.
    93  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
    94  	i := strings.Index(v, ";")
    95  	if i == -1 {
    96  		i = len(v)
    97  	}
    98  	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
    99  
   100  	err = checkMediaTypeDisposition(mediatype)
   101  	if err != nil {
   102  		return "", nil, err
   103  	}
   104  
   105  	params = make(map[string]string)
   106  
   107  	// Map of base parameter name -> parameter name -> value
   108  	// for parameters containing a '*' character.
   109  	// Lazily initialized.
   110  	var continuation map[string]map[string]string
   111  
   112  	v = v[i:]
   113  	for len(v) > 0 {
   114  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   115  		if len(v) == 0 {
   116  			break
   117  		}
   118  		key, value, rest := consumeMediaParam(v)
   119  		if key == "" {
   120  			if strings.TrimSpace(rest) == ";" {
   121  				// Ignore trailing semicolons.
   122  				// Not an error.
   123  				return
   124  			}
   125  			// Parse error.
   126  			return "", nil, errors.New("mime: invalid media parameter")
   127  		}
   128  
   129  		pmap := params
   130  		if idx := strings.Index(key, "*"); idx != -1 {
   131  			baseName := key[:idx]
   132  			if continuation == nil {
   133  				continuation = make(map[string]map[string]string)
   134  			}
   135  			var ok bool
   136  			if pmap, ok = continuation[baseName]; !ok {
   137  				continuation[baseName] = make(map[string]string)
   138  				pmap = continuation[baseName]
   139  			}
   140  		}
   141  		if _, exists := pmap[key]; exists {
   142  			// Duplicate parameter name is bogus.
   143  			return "", nil, errors.New("mime: duplicate parameter name")
   144  		}
   145  		pmap[key] = value
   146  		v = rest
   147  	}
   148  
   149  	// Stitch together any continuations or things with stars
   150  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   151  	var buf bytes.Buffer
   152  	for key, pieceMap := range continuation {
   153  		singlePartKey := key + "*"
   154  		if v, ok := pieceMap[singlePartKey]; ok {
   155  			decv := decode2231Enc(v)
   156  			params[key] = decv
   157  			continue
   158  		}
   159  
   160  		buf.Reset()
   161  		valid := false
   162  		for n := 0; ; n++ {
   163  			simplePart := fmt.Sprintf("%s*%d", key, n)
   164  			if v, ok := pieceMap[simplePart]; ok {
   165  				valid = true
   166  				buf.WriteString(v)
   167  				continue
   168  			}
   169  			encodedPart := simplePart + "*"
   170  			if v, ok := pieceMap[encodedPart]; ok {
   171  				valid = true
   172  				if n == 0 {
   173  					buf.WriteString(decode2231Enc(v))
   174  				} else {
   175  					decv, _ := percentHexUnescape(v)
   176  					buf.WriteString(decv)
   177  				}
   178  			} else {
   179  				break
   180  			}
   181  		}
   182  		if valid {
   183  			params[key] = buf.String()
   184  		}
   185  	}
   186  
   187  	return
   188  }
   189  
   190  func decode2231Enc(v string) string {
   191  	sv := strings.SplitN(v, "'", 3)
   192  	if len(sv) != 3 {
   193  		return ""
   194  	}
   195  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   196  	// need to decide how to expose it in the API. But I'm not sure
   197  	// anybody uses it in practice.
   198  	charset := strings.ToLower(sv[0])
   199  	if charset != "us-ascii" && charset != "utf-8" {
   200  		// TODO: unsupported encoding
   201  		return ""
   202  	}
   203  	encv, _ := percentHexUnescape(sv[2])
   204  	return encv
   205  }
   206  
   207  func isNotTokenChar(r rune) bool {
   208  	return !isTokenChar(r)
   209  }
   210  
   211  // consumeToken consumes a token from the beginning of provided
   212  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   213  // the token consumed and the rest of the string.  Returns ("", v) on
   214  // failure to consume at least one character.
   215  func consumeToken(v string) (token, rest string) {
   216  	notPos := strings.IndexFunc(v, isNotTokenChar)
   217  	if notPos == -1 {
   218  		return v, ""
   219  	}
   220  	if notPos == 0 {
   221  		return "", v
   222  	}
   223  	return v[0:notPos], v[notPos:]
   224  }
   225  
   226  // consumeValue consumes a "value" per RFC 2045, where a value is
   227  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   228  // returns the value consumed (and de-quoted/escaped, if a
   229  // quoted-string) and the rest of the string.  On failure, returns
   230  // ("", v).
   231  func consumeValue(v string) (value, rest string) {
   232  	if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
   233  		return consumeToken(v)
   234  	}
   235  
   236  	leadQuote := rune(v[0])
   237  
   238  	// parse a quoted-string
   239  	rest = v[1:] // consume the leading quote
   240  	buffer := new(bytes.Buffer)
   241  	var idx int
   242  	var r rune
   243  	var nextIsLiteral bool
   244  	for idx, r = range rest {
   245  		switch {
   246  		case nextIsLiteral:
   247  			buffer.WriteRune(r)
   248  			nextIsLiteral = false
   249  		case r == leadQuote:
   250  			return buffer.String(), rest[idx+1:]
   251  		case r == '\\':
   252  			nextIsLiteral = true
   253  		case r != '\r' && r != '\n':
   254  			buffer.WriteRune(r)
   255  		default:
   256  			return "", v
   257  		}
   258  	}
   259  	return "", v
   260  }
   261  
   262  func consumeMediaParam(v string) (param, value, rest string) {
   263  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   264  	if !strings.HasPrefix(rest, ";") {
   265  		return "", "", v
   266  	}
   267  
   268  	rest = rest[1:] // consume semicolon
   269  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   270  	param, rest = consumeToken(rest)
   271  	param = strings.ToLower(param)
   272  	if param == "" {
   273  		return "", "", v
   274  	}
   275  
   276  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   277  	if !strings.HasPrefix(rest, "=") {
   278  		return "", "", v
   279  	}
   280  	rest = rest[1:] // consume equals sign
   281  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   282  	value, rest = consumeValue(rest)
   283  	if value == "" {
   284  		return "", "", v
   285  	}
   286  	return param, value, rest
   287  }
   288  
   289  func percentHexUnescape(s string) (string, error) {
   290  	// Count %, check that they're well-formed.
   291  	percents := 0
   292  	for i := 0; i < len(s); {
   293  		if s[i] != '%' {
   294  			i++
   295  			continue
   296  		}
   297  		percents++
   298  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   299  			s = s[i:]
   300  			if len(s) > 3 {
   301  				s = s[0:3]
   302  			}
   303  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   304  		}
   305  		i += 3
   306  	}
   307  	if percents == 0 {
   308  		return s, nil
   309  	}
   310  
   311  	t := make([]byte, len(s)-2*percents)
   312  	j := 0
   313  	for i := 0; i < len(s); {
   314  		switch s[i] {
   315  		case '%':
   316  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   317  			j++
   318  			i += 3
   319  		default:
   320  			t[j] = s[i]
   321  			j++
   322  			i++
   323  		}
   324  	}
   325  	return string(t), nil
   326  }
   327  
   328  func ishex(c byte) bool {
   329  	switch {
   330  	case '0' <= c && c <= '9':
   331  		return true
   332  	case 'a' <= c && c <= 'f':
   333  		return true
   334  	case 'A' <= c && c <= 'F':
   335  		return true
   336  	}
   337  	return false
   338  }
   339  
   340  func unhex(c byte) byte {
   341  	switch {
   342  	case '0' <= c && c <= '9':
   343  		return c - '0'
   344  	case 'a' <= c && c <= 'f':
   345  		return c - 'a' + 10
   346  	case 'A' <= c && c <= 'F':
   347  		return c - 'A' + 10
   348  	}
   349  	return 0
   350  }