github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/html/template/url.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "strings" 11 ) 12 13 // urlFilter returns its input unless it contains an unsafe scheme in which 14 // case it defangs the entire URL. 15 // 16 // Schemes that cause unintended side effects that are irreversible without user 17 // interaction are considered unsafe. For example, clicking on a "javascript:" 18 // link can immediately trigger JavaScript code execution. 19 // 20 // This filter conservatively assumes that all schemes other than the following 21 // are unsafe: 22 // * http: Navigates to a new website, and may open a new window or tab. 23 // These side effects can be reversed by navigating back to the 24 // previous website, or closing the window or tab. No irreversible 25 // changes will take place without further user interaction with 26 // the new website. 27 // * https: Same as http. 28 // * mailto: Opens an email program and starts a new draft. This side effect 29 // is not irreversible until the user explicitly clicks send; it 30 // can be undone by closing the email program. 31 // 32 // To allow URLs containing other schemes to bypass this filter, developers must 33 // explicitly indicate that such a URL is expected and safe by encapsulating it 34 // in a template.URL value. 35 func urlFilter(args ...interface{}) string { 36 s, t := stringify(args...) 37 if t == contentTypeURL { 38 return s 39 } 40 if !isSafeURL(s) { 41 return "#" + filterFailsafe 42 } 43 return s 44 } 45 46 // isSafeURL is true if s is a relative URL or if URL has a protocol in 47 // (http, https, mailto). 48 func isSafeURL(s string) bool { 49 if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') { 50 51 protocol := s[:i] 52 if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") { 53 return false 54 } 55 } 56 return true 57 } 58 59 // urlEscaper produces an output that can be embedded in a URL query. 60 // The output can be embedded in an HTML attribute without further escaping. 61 func urlEscaper(args ...interface{}) string { 62 return urlProcessor(false, args...) 63 } 64 65 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited 66 // string or parenthesis delimited url(...). 67 // The normalizer does not encode all HTML specials. Specifically, it does not 68 // encode '&' so correct embedding in an HTML attribute requires escaping of 69 // '&' to '&'. 70 func urlNormalizer(args ...interface{}) string { 71 return urlProcessor(true, args...) 72 } 73 74 // urlProcessor normalizes (when norm is true) or escapes its input to produce 75 // a valid hierarchical or opaque URL part. 76 func urlProcessor(norm bool, args ...interface{}) string { 77 s, t := stringify(args...) 78 if t == contentTypeURL { 79 norm = true 80 } 81 var b bytes.Buffer 82 if processURLOnto(s, norm, &b) { 83 return b.String() 84 } 85 return s 86 } 87 88 // processURLOnto appends a normalized URL corresponding to its input to b 89 // and reports whether the appended content differs from s. 90 func processURLOnto(s string, norm bool, b *bytes.Buffer) bool { 91 b.Grow(len(s) + 16) 92 written := 0 93 // The byte loop below assumes that all URLs use UTF-8 as the 94 // content-encoding. This is similar to the URI to IRI encoding scheme 95 // defined in section 3.1 of RFC 3987, and behaves the same as the 96 // EcmaScript builtin encodeURIComponent. 97 // It should not cause any misencoding of URLs in pages with 98 // Content-type: text/html;charset=UTF-8. 99 for i, n := 0, len(s); i < n; i++ { 100 c := s[i] 101 switch c { 102 // Single quote and parens are sub-delims in RFC 3986, but we 103 // escape them so the output can be embedded in single 104 // quoted attributes and unquoted CSS url(...) constructs. 105 // Single quotes are reserved in URLs, but are only used in 106 // the obsolete "mark" rule in an appendix in RFC 3986 107 // so can be safely encoded. 108 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': 109 if norm { 110 continue 111 } 112 // Unreserved according to RFC 3986 sec 2.3 113 // "For consistency, percent-encoded octets in the ranges of 114 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), 115 // period (%2E), underscore (%5F), or tilde (%7E) should not be 116 // created by URI producers 117 case '-', '.', '_', '~': 118 continue 119 case '%': 120 // When normalizing do not re-encode valid escapes. 121 if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { 122 continue 123 } 124 default: 125 // Unreserved according to RFC 3986 sec 2.3 126 if 'a' <= c && c <= 'z' { 127 continue 128 } 129 if 'A' <= c && c <= 'Z' { 130 continue 131 } 132 if '0' <= c && c <= '9' { 133 continue 134 } 135 } 136 b.WriteString(s[written:i]) 137 fmt.Fprintf(b, "%%%02x", c) 138 written = i + 1 139 } 140 b.WriteString(s[written:]) 141 return written != 0 142 } 143 144 // Filters and normalizes srcset values which are comma separated 145 // URLs followed by metadata. 146 func srcsetFilterAndEscaper(args ...interface{}) string { 147 s, t := stringify(args...) 148 switch t { 149 case contentTypeSrcset: 150 return s 151 case contentTypeURL: 152 // Normalizing gets rid of all HTML whitespace 153 // which separate the image URL from its metadata. 154 var b bytes.Buffer 155 if processURLOnto(s, true, &b) { 156 s = b.String() 157 } 158 // Additionally, commas separate one source from another. 159 return strings.ReplaceAll(s, ",", "%2c") 160 } 161 162 var b bytes.Buffer 163 written := 0 164 for i := 0; i < len(s); i++ { 165 if s[i] == ',' { 166 filterSrcsetElement(s, written, i, &b) 167 b.WriteString(",") 168 written = i + 1 169 } 170 } 171 filterSrcsetElement(s, written, len(s), &b) 172 return b.String() 173 } 174 175 // Derived from https://play.golang.org/p/Dhmj7FORT5 176 const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07" 177 178 // isHTMLSpace is true iff c is a whitespace character per 179 // https://infra.spec.whatwg.org/#ascii-whitespace 180 func isHTMLSpace(c byte) bool { 181 return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7))) 182 } 183 184 func isHTMLSpaceOrASCIIAlnum(c byte) bool { 185 return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7))) 186 } 187 188 func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) { 189 start := left 190 for start < right && isHTMLSpace(s[start]) { 191 start++ 192 } 193 end := right 194 for i := start; i < right; i++ { 195 if isHTMLSpace(s[i]) { 196 end = i 197 break 198 } 199 } 200 if url := s[start:end]; isSafeURL(url) { 201 // If image metadata is only spaces or alnums then 202 // we don't need to URL normalize it. 203 metadataOk := true 204 for i := end; i < right; i++ { 205 if !isHTMLSpaceOrASCIIAlnum(s[i]) { 206 metadataOk = false 207 break 208 } 209 } 210 if metadataOk { 211 b.WriteString(s[left:start]) 212 processURLOnto(url, true, b) 213 b.WriteString(s[end:right]) 214 return 215 } 216 } 217 b.WriteString("#") 218 b.WriteString(filterFailsafe) 219 }