github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/mime/mediatype.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"sort"
    12  	"strings"
    13  	"unicode"
    14  )
    15  
    16  // FormatMediaType serializes mediatype t and the parameters
    17  // param as a media type conforming to RFC 2045 and RFC 2616.
    18  // The type and parameter names are written in lower-case.
    19  // When any of the arguments result in a standard violation then
    20  // FormatMediaType returns the empty string.
    21  func FormatMediaType(t string, param map[string]string) string {
    22  	slash := strings.Index(t, "/")
    23  	if slash == -1 {
    24  		return ""
    25  	}
    26  	major, sub := t[:slash], t[slash+1:]
    27  	if !isToken(major) || !isToken(sub) {
    28  		return ""
    29  	}
    30  	var b bytes.Buffer
    31  	b.WriteString(strings.ToLower(major))
    32  	b.WriteByte('/')
    33  	b.WriteString(strings.ToLower(sub))
    34  
    35  	attrs := make([]string, 0, len(param))
    36  	for a := range param {
    37  		attrs = append(attrs, a)
    38  	}
    39  	sort.Strings(attrs)
    40  
    41  	for _, attribute := range attrs {
    42  		value := param[attribute]
    43  		b.WriteByte(';')
    44  		b.WriteByte(' ')
    45  		if !isToken(attribute) {
    46  			return ""
    47  		}
    48  		b.WriteString(strings.ToLower(attribute))
    49  		b.WriteByte('=')
    50  		if isToken(value) {
    51  			b.WriteString(value)
    52  			continue
    53  		}
    54  
    55  		b.WriteByte('"')
    56  		offset := 0
    57  		for index, character := range value {
    58  			if character == '"' || character == '\\' {
    59  				b.WriteString(value[offset:index])
    60  				offset = index
    61  				b.WriteByte('\\')
    62  			}
    63  			if character&0x80 != 0 {
    64  				return ""
    65  			}
    66  		}
    67  		b.WriteString(value[offset:])
    68  		b.WriteByte('"')
    69  	}
    70  	return b.String()
    71  }
    72  
    73  func checkMediaTypeDisposition(s string) error {
    74  	typ, rest := consumeToken(s)
    75  	if typ == "" {
    76  		return errors.New("mime: no media type")
    77  	}
    78  	if rest == "" {
    79  		return nil
    80  	}
    81  	if !strings.HasPrefix(rest, "/") {
    82  		return errors.New("mime: expected slash after first token")
    83  	}
    84  	subtype, rest := consumeToken(rest[1:])
    85  	if subtype == "" {
    86  		return errors.New("mime: expected token after slash")
    87  	}
    88  	if rest != "" {
    89  		return errors.New("mime: unexpected content after media subtype")
    90  	}
    91  	return nil
    92  }
    93  
    94  // ParseMediaType parses a media type value and any optional
    95  // parameters, per RFC 1521.  Media types are the values in
    96  // Content-Type and Content-Disposition headers (RFC 2183).
    97  // On success, ParseMediaType returns the media type converted
    98  // to lowercase and trimmed of white space and a non-nil map.
    99  // The returned map, params, maps from the lowercase
   100  // attribute to the attribute value with its case preserved.
   101  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   102  	i := strings.Index(v, ";")
   103  	if i == -1 {
   104  		i = len(v)
   105  	}
   106  	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
   107  
   108  	err = checkMediaTypeDisposition(mediatype)
   109  	if err != nil {
   110  		return "", nil, err
   111  	}
   112  
   113  	params = make(map[string]string)
   114  
   115  	// Map of base parameter name -> parameter name -> value
   116  	// for parameters containing a '*' character.
   117  	// Lazily initialized.
   118  	var continuation map[string]map[string]string
   119  
   120  	v = v[i:]
   121  	for len(v) > 0 {
   122  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   123  		if len(v) == 0 {
   124  			break
   125  		}
   126  		key, value, rest := consumeMediaParam(v)
   127  		if key == "" {
   128  			if strings.TrimSpace(rest) == ";" {
   129  				// Ignore trailing semicolons.
   130  				// Not an error.
   131  				return
   132  			}
   133  			// Parse error.
   134  			return "", nil, errors.New("mime: invalid media parameter")
   135  		}
   136  
   137  		pmap := params
   138  		if idx := strings.Index(key, "*"); idx != -1 {
   139  			baseName := key[:idx]
   140  			if continuation == nil {
   141  				continuation = make(map[string]map[string]string)
   142  			}
   143  			var ok bool
   144  			if pmap, ok = continuation[baseName]; !ok {
   145  				continuation[baseName] = make(map[string]string)
   146  				pmap = continuation[baseName]
   147  			}
   148  		}
   149  		if _, exists := pmap[key]; exists {
   150  			// Duplicate parameter name is bogus.
   151  			return "", nil, errors.New("mime: duplicate parameter name")
   152  		}
   153  		pmap[key] = value
   154  		v = rest
   155  	}
   156  
   157  	// Stitch together any continuations or things with stars
   158  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   159  	var buf bytes.Buffer
   160  	for key, pieceMap := range continuation {
   161  		singlePartKey := key + "*"
   162  		if v, ok := pieceMap[singlePartKey]; ok {
   163  			decv := decode2231Enc(v)
   164  			params[key] = decv
   165  			continue
   166  		}
   167  
   168  		buf.Reset()
   169  		valid := false
   170  		for n := 0; ; n++ {
   171  			simplePart := fmt.Sprintf("%s*%d", key, n)
   172  			if v, ok := pieceMap[simplePart]; ok {
   173  				valid = true
   174  				buf.WriteString(v)
   175  				continue
   176  			}
   177  			encodedPart := simplePart + "*"
   178  			if v, ok := pieceMap[encodedPart]; ok {
   179  				valid = true
   180  				if n == 0 {
   181  					buf.WriteString(decode2231Enc(v))
   182  				} else {
   183  					decv, _ := percentHexUnescape(v)
   184  					buf.WriteString(decv)
   185  				}
   186  			} else {
   187  				break
   188  			}
   189  		}
   190  		if valid {
   191  			params[key] = buf.String()
   192  		}
   193  	}
   194  
   195  	return
   196  }
   197  
   198  func decode2231Enc(v string) string {
   199  	sv := strings.SplitN(v, "'", 3)
   200  	if len(sv) != 3 {
   201  		return ""
   202  	}
   203  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   204  	// need to decide how to expose it in the API. But I'm not sure
   205  	// anybody uses it in practice.
   206  	charset := strings.ToLower(sv[0])
   207  	if charset != "us-ascii" && charset != "utf-8" {
   208  		// TODO: unsupported encoding
   209  		return ""
   210  	}
   211  	encv, _ := percentHexUnescape(sv[2])
   212  	return encv
   213  }
   214  
   215  func isNotTokenChar(r rune) bool {
   216  	return !isTokenChar(r)
   217  }
   218  
   219  // consumeToken consumes a token from the beginning of provided
   220  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   221  // the token consumed and the rest of the string.  Returns ("", v) on
   222  // failure to consume at least one character.
   223  func consumeToken(v string) (token, rest string) {
   224  	notPos := strings.IndexFunc(v, isNotTokenChar)
   225  	if notPos == -1 {
   226  		return v, ""
   227  	}
   228  	if notPos == 0 {
   229  		return "", v
   230  	}
   231  	return v[0:notPos], v[notPos:]
   232  }
   233  
   234  // consumeValue consumes a "value" per RFC 2045, where a value is
   235  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   236  // returns the value consumed (and de-quoted/escaped, if a
   237  // quoted-string) and the rest of the string.  On failure, returns
   238  // ("", v).
   239  func consumeValue(v string) (value, rest string) {
   240  	if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
   241  		return consumeToken(v)
   242  	}
   243  
   244  	leadQuote := rune(v[0])
   245  
   246  	// parse a quoted-string
   247  	rest = v[1:] // consume the leading quote
   248  	buffer := new(bytes.Buffer)
   249  	var idx int
   250  	var r rune
   251  	var nextIsLiteral bool
   252  	for idx, r = range rest {
   253  		switch {
   254  		case nextIsLiteral:
   255  			buffer.WriteRune(r)
   256  			nextIsLiteral = false
   257  		case r == leadQuote:
   258  			return buffer.String(), rest[idx+1:]
   259  		case r == '\\':
   260  			nextIsLiteral = true
   261  		case r != '\r' && r != '\n':
   262  			buffer.WriteRune(r)
   263  		default:
   264  			return "", v
   265  		}
   266  	}
   267  	return "", v
   268  }
   269  
   270  func consumeMediaParam(v string) (param, value, rest string) {
   271  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   272  	if !strings.HasPrefix(rest, ";") {
   273  		return "", "", v
   274  	}
   275  
   276  	rest = rest[1:] // consume semicolon
   277  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   278  	param, rest = consumeToken(rest)
   279  	param = strings.ToLower(param)
   280  	if param == "" {
   281  		return "", "", v
   282  	}
   283  
   284  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   285  	if !strings.HasPrefix(rest, "=") {
   286  		return "", "", v
   287  	}
   288  	rest = rest[1:] // consume equals sign
   289  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   290  	value, rest = consumeValue(rest)
   291  	if value == "" {
   292  		return "", "", v
   293  	}
   294  	return param, value, rest
   295  }
   296  
   297  func percentHexUnescape(s string) (string, error) {
   298  	// Count %, check that they're well-formed.
   299  	percents := 0
   300  	for i := 0; i < len(s); {
   301  		if s[i] != '%' {
   302  			i++
   303  			continue
   304  		}
   305  		percents++
   306  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   307  			s = s[i:]
   308  			if len(s) > 3 {
   309  				s = s[0:3]
   310  			}
   311  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   312  		}
   313  		i += 3
   314  	}
   315  	if percents == 0 {
   316  		return s, nil
   317  	}
   318  
   319  	t := make([]byte, len(s)-2*percents)
   320  	j := 0
   321  	for i := 0; i < len(s); {
   322  		switch s[i] {
   323  		case '%':
   324  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   325  			j++
   326  			i += 3
   327  		default:
   328  			t[j] = s[i]
   329  			j++
   330  			i++
   331  		}
   332  	}
   333  	return string(t), nil
   334  }
   335  
   336  func ishex(c byte) bool {
   337  	switch {
   338  	case '0' <= c && c <= '9':
   339  		return true
   340  	case 'a' <= c && c <= 'f':
   341  		return true
   342  	case 'A' <= c && c <= 'F':
   343  		return true
   344  	}
   345  	return false
   346  }
   347  
   348  func unhex(c byte) byte {
   349  	switch {
   350  	case '0' <= c && c <= '9':
   351  		return c - '0'
   352  	case 'a' <= c && c <= 'f':
   353  		return c - 'a' + 10
   354  	case 'A' <= c && c <= 'F':
   355  		return c - 'A' + 10
   356  	}
   357  	return 0
   358  }