github.com/ActiveState/go@v0.0.0-20170614201249-0b81c023a722/src/mime/mediatype.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mime 6 7 import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "sort" 12 "strings" 13 "unicode" 14 ) 15 16 // FormatMediaType serializes mediatype t and the parameters 17 // param as a media type conforming to RFC 2045 and RFC 2616. 18 // The type and parameter names are written in lower-case. 19 // When any of the arguments result in a standard violation then 20 // FormatMediaType returns the empty string. 21 func FormatMediaType(t string, param map[string]string) string { 22 var b bytes.Buffer 23 if slash := strings.Index(t, "/"); slash == -1 { 24 if !isToken(t) { 25 return "" 26 } 27 b.WriteString(strings.ToLower(t)) 28 } else { 29 major, sub := t[:slash], t[slash+1:] 30 if !isToken(major) || !isToken(sub) { 31 return "" 32 } 33 b.WriteString(strings.ToLower(major)) 34 b.WriteByte('/') 35 b.WriteString(strings.ToLower(sub)) 36 } 37 38 attrs := make([]string, 0, len(param)) 39 for a := range param { 40 attrs = append(attrs, a) 41 } 42 sort.Strings(attrs) 43 44 for _, attribute := range attrs { 45 value := param[attribute] 46 b.WriteByte(';') 47 b.WriteByte(' ') 48 if !isToken(attribute) { 49 return "" 50 } 51 b.WriteString(strings.ToLower(attribute)) 52 b.WriteByte('=') 53 if isToken(value) { 54 b.WriteString(value) 55 continue 56 } 57 58 b.WriteByte('"') 59 offset := 0 60 for index, character := range value { 61 if character == '"' || character == '\\' { 62 b.WriteString(value[offset:index]) 63 offset = index 64 b.WriteByte('\\') 65 } 66 if character&0x80 != 0 { 67 return "" 68 } 69 } 70 b.WriteString(value[offset:]) 71 b.WriteByte('"') 72 } 73 return b.String() 74 } 75 76 func checkMediaTypeDisposition(s string) error { 77 typ, rest := consumeToken(s) 78 if typ == "" { 79 return errors.New("mime: no media type") 80 } 81 if rest == "" { 82 return nil 83 } 84 if !strings.HasPrefix(rest, "/") { 85 return errors.New("mime: expected slash after first token") 86 } 87 subtype, rest := consumeToken(rest[1:]) 88 if subtype == "" { 89 return errors.New("mime: expected token after slash") 90 } 91 if rest != "" { 92 return errors.New("mime: unexpected content after media subtype") 93 } 94 return nil 95 } 96 97 // ErrInvalidMediaParameter is returned by ParseMediaType if 98 // the media type value was found but there was an error parsing 99 // the optional parameters 100 var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter") 101 102 // ParseMediaType parses a media type value and any optional 103 // parameters, per RFC 1521. Media types are the values in 104 // Content-Type and Content-Disposition headers (RFC 2183). 105 // On success, ParseMediaType returns the media type converted 106 // to lowercase and trimmed of white space and a non-nil map. 107 // If there is an error parsing the optional parameter, 108 // the media type will be returned along with the error 109 // ErrInvalidMediaParameter. 110 // The returned map, params, maps from the lowercase 111 // attribute to the attribute value with its case preserved. 112 func ParseMediaType(v string) (mediatype string, params map[string]string, err error) { 113 i := strings.Index(v, ";") 114 if i == -1 { 115 i = len(v) 116 } 117 mediatype = strings.TrimSpace(strings.ToLower(v[0:i])) 118 119 err = checkMediaTypeDisposition(mediatype) 120 if err != nil { 121 return "", nil, err 122 } 123 124 params = make(map[string]string) 125 126 // Map of base parameter name -> parameter name -> value 127 // for parameters containing a '*' character. 128 // Lazily initialized. 129 var continuation map[string]map[string]string 130 131 v = v[i:] 132 for len(v) > 0 { 133 v = strings.TrimLeftFunc(v, unicode.IsSpace) 134 if len(v) == 0 { 135 break 136 } 137 key, value, rest := consumeMediaParam(v) 138 if key == "" { 139 if strings.TrimSpace(rest) == ";" { 140 // Ignore trailing semicolons. 141 // Not an error. 142 return 143 } 144 // Parse error. 145 return mediatype, nil, ErrInvalidMediaParameter 146 } 147 148 pmap := params 149 if idx := strings.Index(key, "*"); idx != -1 { 150 baseName := key[:idx] 151 if continuation == nil { 152 continuation = make(map[string]map[string]string) 153 } 154 var ok bool 155 if pmap, ok = continuation[baseName]; !ok { 156 continuation[baseName] = make(map[string]string) 157 pmap = continuation[baseName] 158 } 159 } 160 if _, exists := pmap[key]; exists { 161 // Duplicate parameter name is bogus. 162 return "", nil, errors.New("mime: duplicate parameter name") 163 } 164 pmap[key] = value 165 v = rest 166 } 167 168 // Stitch together any continuations or things with stars 169 // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") 170 var buf bytes.Buffer 171 for key, pieceMap := range continuation { 172 singlePartKey := key + "*" 173 if v, ok := pieceMap[singlePartKey]; ok { 174 decv := decode2231Enc(v) 175 params[key] = decv 176 continue 177 } 178 179 buf.Reset() 180 valid := false 181 for n := 0; ; n++ { 182 simplePart := fmt.Sprintf("%s*%d", key, n) 183 if v, ok := pieceMap[simplePart]; ok { 184 valid = true 185 buf.WriteString(v) 186 continue 187 } 188 encodedPart := simplePart + "*" 189 if v, ok := pieceMap[encodedPart]; ok { 190 valid = true 191 if n == 0 { 192 buf.WriteString(decode2231Enc(v)) 193 } else { 194 decv, _ := percentHexUnescape(v) 195 buf.WriteString(decv) 196 } 197 } else { 198 break 199 } 200 } 201 if valid { 202 params[key] = buf.String() 203 } 204 } 205 206 return 207 } 208 209 func decode2231Enc(v string) string { 210 sv := strings.SplitN(v, "'", 3) 211 if len(sv) != 3 { 212 return "" 213 } 214 // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll 215 // need to decide how to expose it in the API. But I'm not sure 216 // anybody uses it in practice. 217 charset := strings.ToLower(sv[0]) 218 if charset != "us-ascii" && charset != "utf-8" { 219 // TODO: unsupported encoding 220 return "" 221 } 222 encv, _ := percentHexUnescape(sv[2]) 223 return encv 224 } 225 226 func isNotTokenChar(r rune) bool { 227 return !isTokenChar(r) 228 } 229 230 // consumeToken consumes a token from the beginning of provided 231 // string, per RFC 2045 section 5.1 (referenced from 2183), and return 232 // the token consumed and the rest of the string. Returns ("", v) on 233 // failure to consume at least one character. 234 func consumeToken(v string) (token, rest string) { 235 notPos := strings.IndexFunc(v, isNotTokenChar) 236 if notPos == -1 { 237 return v, "" 238 } 239 if notPos == 0 { 240 return "", v 241 } 242 return v[0:notPos], v[notPos:] 243 } 244 245 // consumeValue consumes a "value" per RFC 2045, where a value is 246 // either a 'token' or a 'quoted-string'. On success, consumeValue 247 // returns the value consumed (and de-quoted/escaped, if a 248 // quoted-string) and the rest of the string. On failure, returns 249 // ("", v). 250 func consumeValue(v string) (value, rest string) { 251 if v == "" { 252 return 253 } 254 if v[0] != '"' { 255 return consumeToken(v) 256 } 257 258 // parse a quoted-string 259 buffer := new(bytes.Buffer) 260 for i := 1; i < len(v); i++ { 261 r := v[i] 262 if r == '"' { 263 return buffer.String(), v[i+1:] 264 } 265 // When MSIE sends a full file path (in "intranet mode"), it does not 266 // escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt". 267 // 268 // No known MIME generators emit unnecessary backslash escapes 269 // for simple token characters like numbers and letters. 270 // 271 // If we see an unnecessary backslash escape, assume it is from MSIE 272 // and intended as a literal backslash. This makes Go servers deal better 273 // with MSIE without affecting the way they handle conforming MIME 274 // generators. 275 if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) { 276 buffer.WriteByte(v[i+1]) 277 i++ 278 continue 279 } 280 if r == '\r' || r == '\n' { 281 return "", v 282 } 283 buffer.WriteByte(v[i]) 284 } 285 // Did not find end quote. 286 return "", v 287 } 288 289 func consumeMediaParam(v string) (param, value, rest string) { 290 rest = strings.TrimLeftFunc(v, unicode.IsSpace) 291 if !strings.HasPrefix(rest, ";") { 292 return "", "", v 293 } 294 295 rest = rest[1:] // consume semicolon 296 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 297 param, rest = consumeToken(rest) 298 param = strings.ToLower(param) 299 if param == "" { 300 return "", "", v 301 } 302 303 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 304 if !strings.HasPrefix(rest, "=") { 305 return "", "", v 306 } 307 rest = rest[1:] // consume equals sign 308 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 309 value, rest2 := consumeValue(rest) 310 if value == "" && rest2 == rest { 311 return "", "", v 312 } 313 rest = rest2 314 return param, value, rest 315 } 316 317 func percentHexUnescape(s string) (string, error) { 318 // Count %, check that they're well-formed. 319 percents := 0 320 for i := 0; i < len(s); { 321 if s[i] != '%' { 322 i++ 323 continue 324 } 325 percents++ 326 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 327 s = s[i:] 328 if len(s) > 3 { 329 s = s[0:3] 330 } 331 return "", fmt.Errorf("mime: bogus characters after %%: %q", s) 332 } 333 i += 3 334 } 335 if percents == 0 { 336 return s, nil 337 } 338 339 t := make([]byte, len(s)-2*percents) 340 j := 0 341 for i := 0; i < len(s); { 342 switch s[i] { 343 case '%': 344 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 345 j++ 346 i += 3 347 default: 348 t[j] = s[i] 349 j++ 350 i++ 351 } 352 } 353 return string(t), nil 354 } 355 356 func ishex(c byte) bool { 357 switch { 358 case '0' <= c && c <= '9': 359 return true 360 case 'a' <= c && c <= 'f': 361 return true 362 case 'A' <= c && c <= 'F': 363 return true 364 } 365 return false 366 } 367 368 func unhex(c byte) byte { 369 switch { 370 case '0' <= c && c <= '9': 371 return c - '0' 372 case 'a' <= c && c <= 'f': 373 return c - 'a' + 10 374 case 'A' <= c && c <= 'F': 375 return c - 'A' + 10 376 } 377 return 0 378 }