github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/html/template/url.go

github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/html/template/url.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  )
    12  
    13  // urlFilter returns its input unless it contains an unsafe scheme in which
    14  // case it defangs the entire URL.
    15  //
    16  // Schemes that cause unintended side effects that are irreversible without user
    17  // interaction are considered unsafe. For example, clicking on a "javascript:"
    18  // link can immediately trigger JavaScript code execution.
    19  //
    20  // This filter conservatively assumes that all schemes other than the following
    21  // are unsafe:
    22  //    * http:   Navigates to a new website, and may open a new window or tab.
    23  //              These side effects can be reversed by navigating back to the
    24  //              previous website, or closing the window or tab. No irreversible
    25  //              changes will take place without further user interaction with
    26  //              the new website.
    27  //    * https:  Same as http.
    28  //    * mailto: Opens an email program and starts a new draft. This side effect
    29  //              is not irreversible until the user explicitly clicks send; it
    30  //              can be undone by closing the email program.
    31  //
    32  // To allow URLs containing other schemes to bypass this filter, developers must
    33  // explicitly indicate that such a URL is expected and safe by encapsulating it
    34  // in a template.URL value.
    35  func urlFilter(args ...interface{}) string {
    36  	s, t := stringify(args...)
    37  	if t == contentTypeURL {
    38  		return s
    39  	}
    40  	if !isSafeURL(s) {
    41  		return "#" + filterFailsafe
    42  	}
    43  	return s
    44  }
    45  
    46  // isSafeURL is true if s is a relative URL or if URL has a protocol in
    47  // (http, https, mailto).
    48  func isSafeURL(s string) bool {
    49  	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
    50  
    51  		protocol := s[:i]
    52  		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
    53  			return false
    54  		}
    55  	}
    56  	return true
    57  }
    58  
    59  // urlEscaper produces an output that can be embedded in a URL query.
    60  // The output can be embedded in an HTML attribute without further escaping.
    61  func urlEscaper(args ...interface{}) string {
    62  	return urlProcessor(false, args...)
    63  }
    64  
    65  // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
    66  // string or parenthesis delimited url(...).
    67  // The normalizer does not encode all HTML specials. Specifically, it does not
    68  // encode '&' so correct embedding in an HTML attribute requires escaping of
    69  // '&' to '&amp;'.
    70  func urlNormalizer(args ...interface{}) string {
    71  	return urlProcessor(true, args...)
    72  }
    73  
    74  // urlProcessor normalizes (when norm is true) or escapes its input to produce
    75  // a valid hierarchical or opaque URL part.
    76  func urlProcessor(norm bool, args ...interface{}) string {
    77  	s, t := stringify(args...)
    78  	if t == contentTypeURL {
    79  		norm = true
    80  	}
    81  	var b bytes.Buffer
    82  	if processURLOnto(s, norm, &b) {
    83  		return b.String()
    84  	}
    85  	return s
    86  }
    87  
    88  // processURLOnto appends a normalized URL corresponding to its input to b
    89  // and reports whether the appended content differs from s.
    90  func processURLOnto(s string, norm bool, b *bytes.Buffer) bool {
    91  	b.Grow(len(s) + 16)
    92  	written := 0
    93  	// The byte loop below assumes that all URLs use UTF-8 as the
    94  	// content-encoding. This is similar to the URI to IRI encoding scheme
    95  	// defined in section 3.1 of  RFC 3987, and behaves the same as the
    96  	// EcmaScript builtin encodeURIComponent.
    97  	// It should not cause any misencoding of URLs in pages with
    98  	// Content-type: text/html;charset=UTF-8.
    99  	for i, n := 0, len(s); i < n; i++ {
   100  		c := s[i]
   101  		switch c {
   102  		// Single quote and parens are sub-delims in RFC 3986, but we
   103  		// escape them so the output can be embedded in single
   104  		// quoted attributes and unquoted CSS url(...) constructs.
   105  		// Single quotes are reserved in URLs, but are only used in
   106  		// the obsolete "mark" rule in an appendix in RFC 3986
   107  		// so can be safely encoded.
   108  		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
   109  			if norm {
   110  				continue
   111  			}
   112  		// Unreserved according to RFC 3986 sec 2.3
   113  		// "For consistency, percent-encoded octets in the ranges of
   114  		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
   115  		// period (%2E), underscore (%5F), or tilde (%7E) should not be
   116  		// created by URI producers
   117  		case '-', '.', '_', '~':
   118  			continue
   119  		case '%':
   120  			// When normalizing do not re-encode valid escapes.
   121  			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
   122  				continue
   123  			}
   124  		default:
   125  			// Unreserved according to RFC 3986 sec 2.3
   126  			if 'a' <= c && c <= 'z' {
   127  				continue
   128  			}
   129  			if 'A' <= c && c <= 'Z' {
   130  				continue
   131  			}
   132  			if '0' <= c && c <= '9' {
   133  				continue
   134  			}
   135  		}
   136  		b.WriteString(s[written:i])
   137  		fmt.Fprintf(b, "%%%02x", c)
   138  		written = i + 1
   139  	}
   140  	b.WriteString(s[written:])
   141  	return written != 0
   142  }
   143  
   144  // Filters and normalizes srcset values which are comma separated
   145  // URLs followed by metadata.
   146  func srcsetFilterAndEscaper(args ...interface{}) string {
   147  	s, t := stringify(args...)
   148  	switch t {
   149  	case contentTypeSrcset:
   150  		return s
   151  	case contentTypeURL:
   152  		// Normalizing gets rid of all HTML whitespace
   153  		// which separate the image URL from its metadata.
   154  		var b bytes.Buffer
   155  		if processURLOnto(s, true, &b) {
   156  			s = b.String()
   157  		}
   158  		// Additionally, commas separate one source from another.
   159  		return strings.ReplaceAll(s, ",", "%2c")
   160  	}
   161  
   162  	var b bytes.Buffer
   163  	written := 0
   164  	for i := 0; i < len(s); i++ {
   165  		if s[i] == ',' {
   166  			filterSrcsetElement(s, written, i, &b)
   167  			b.WriteString(",")
   168  			written = i + 1
   169  		}
   170  	}
   171  	filterSrcsetElement(s, written, len(s), &b)
   172  	return b.String()
   173  }
   174  
   175  // Derived from https://play.golang.org/p/Dhmj7FORT5
   176  const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
   177  
   178  // isHTMLSpace is true iff c is a whitespace character per
   179  // https://infra.spec.whatwg.org/#ascii-whitespace
   180  func isHTMLSpace(c byte) bool {
   181  	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
   182  }
   183  
   184  func isHTMLSpaceOrASCIIAlnum(c byte) bool {
   185  	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
   186  }
   187  
   188  func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
   189  	start := left
   190  	for start < right && isHTMLSpace(s[start]) {
   191  		start++
   192  	}
   193  	end := right
   194  	for i := start; i < right; i++ {
   195  		if isHTMLSpace(s[i]) {
   196  			end = i
   197  			break
   198  		}
   199  	}
   200  	if url := s[start:end]; isSafeURL(url) {
   201  		// If image metadata is only spaces or alnums then
   202  		// we don't need to URL normalize it.
   203  		metadataOk := true
   204  		for i := end; i < right; i++ {
   205  			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
   206  				metadataOk = false
   207  				break
   208  			}
   209  		}
   210  		if metadataOk {
   211  			b.WriteString(s[left:start])
   212  			processURLOnto(url, true, b)
   213  			b.WriteString(s[end:right])
   214  			return
   215  		}
   216  	}
   217  	b.WriteString("#")
   218  	b.WriteString(filterFailsafe)
   219  }