github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/html/template/url.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  )
    12  
    13  // urlFilter returns its input unless it contains an unsafe scheme in which
    14  // case it defangs the entire URL.
    15  //
    16  // Schemes that cause unintended side effects that are irreversible without user
    17  // interaction are considered unsafe. For example, clicking on a "javascript:"
    18  // link can immediately trigger JavaScript code execution.
    19  //
    20  // This filter conservatively assumes that all schemes other than the following
    21  // are unsafe:
    22  //    * http:   Navigates to a new website, and may open a new window or tab.
    23  //              These side effects can be reversed by navigating back to the
    24  //              previous website, or closing the window or tab. No irreversible
    25  //              changes will take place without further user interaction with
    26  //              the new website.
    27  //    * https:  Same as http.
    28  //    * mailto: Opens an email program and starts a new draft. This side effect
    29  //              is not irreversible until the user explicitly clicks send; it
    30  //              can be undone by closing the email program.
    31  //
    32  // To allow URLs containing other schemes to bypass this filter, developers must
    33  // explicitly indicate that such a URL is expected and safe by encapsulating it
    34  // in a template.URL value.
    35  func urlFilter(args ...interface{}) string {
    36  	s, t := stringify(args...)
    37  	if t == contentTypeURL {
    38  		return s
    39  	}
    40  	if i := strings.IndexRune(s, ':'); i >= 0 && !strings.ContainsRune(s[:i], '/') {
    41  		protocol := strings.ToLower(s[:i])
    42  		if protocol != "http" && protocol != "https" && protocol != "mailto" {
    43  			return "#" + filterFailsafe
    44  		}
    45  	}
    46  	return s
    47  }
    48  
    49  // urlEscaper produces an output that can be embedded in a URL query.
    50  // The output can be embedded in an HTML attribute without further escaping.
    51  func urlEscaper(args ...interface{}) string {
    52  	return urlProcessor(false, args...)
    53  }
    54  
    55  // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
    56  // string or parenthesis delimited url(...).
    57  // The normalizer does not encode all HTML specials. Specifically, it does not
    58  // encode '&' so correct embedding in an HTML attribute requires escaping of
    59  // '&' to '&'.
    60  func urlNormalizer(args ...interface{}) string {
    61  	return urlProcessor(true, args...)
    62  }
    63  
    64  // urlProcessor normalizes (when norm is true) or escapes its input to produce
    65  // a valid hierarchical or opaque URL part.
    66  func urlProcessor(norm bool, args ...interface{}) string {
    67  	s, t := stringify(args...)
    68  	if t == contentTypeURL {
    69  		norm = true
    70  	}
    71  	var b bytes.Buffer
    72  	written := 0
    73  	// The byte loop below assumes that all URLs use UTF-8 as the
    74  	// content-encoding. This is similar to the URI to IRI encoding scheme
    75  	// defined in section 3.1 of  RFC 3987, and behaves the same as the
    76  	// EcmaScript builtin encodeURIComponent.
    77  	// It should not cause any misencoding of URLs in pages with
    78  	// Content-type: text/html;charset=UTF-8.
    79  	for i, n := 0, len(s); i < n; i++ {
    80  		c := s[i]
    81  		switch c {
    82  		// Single quote and parens are sub-delims in RFC 3986, but we
    83  		// escape them so the output can be embedded in single
    84  		// quoted attributes and unquoted CSS url(...) constructs.
    85  		// Single quotes are reserved in URLs, but are only used in
    86  		// the obsolete "mark" rule in an appendix in RFC 3986
    87  		// so can be safely encoded.
    88  		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
    89  			if norm {
    90  				continue
    91  			}
    92  		// Unreserved according to RFC 3986 sec 2.3
    93  		// "For consistency, percent-encoded octets in the ranges of
    94  		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
    95  		// period (%2E), underscore (%5F), or tilde (%7E) should not be
    96  		// created by URI producers
    97  		case '-', '.', '_', '~':
    98  			continue
    99  		case '%':
   100  			// When normalizing do not re-encode valid escapes.
   101  			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
   102  				continue
   103  			}
   104  		default:
   105  			// Unreserved according to RFC 3986 sec 2.3
   106  			if 'a' <= c && c <= 'z' {
   107  				continue
   108  			}
   109  			if 'A' <= c && c <= 'Z' {
   110  				continue
   111  			}
   112  			if '0' <= c && c <= '9' {
   113  				continue
   114  			}
   115  		}
   116  		b.WriteString(s[written:i])
   117  		fmt.Fprintf(&b, "%%%02x", c)
   118  		written = i + 1
   119  	}
   120  	if written == 0 {
   121  		return s
   122  	}
   123  	b.WriteString(s[written:])
   124  	return b.String()
   125  }