github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/bluemonday/helpers.go (about)

     1  // Copyright (c) 2014, David Kitchen <david@buro9.com>
     2  //
     3  // All rights reserved.
     4  //
     5  // Redistribution and use in source and binary forms, with or without
     6  // modification, are permitted provided that the following conditions are met:
     7  //
     8  // * Redistributions of source code must retain the above copyright notice, this
     9  //   list of conditions and the following disclaimer.
    10  //
    11  // * Redistributions in binary form must reproduce the above copyright notice,
    12  //   this list of conditions and the following disclaimer in the documentation
    13  //   and/or other materials provided with the distribution.
    14  //
    15  // * Neither the name of the organisation (Microcosm) nor the names of its
    16  //   contributors may be used to endorse or promote products derived from
    17  //   this software without specific prior written permission.
    18  //
    19  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    20  // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    21  // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    22  // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    23  // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    24  // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    25  // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    26  // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    27  // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    28  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29  
    30  package bluemonday
    31  
    32  import (
    33  	"encoding/base64"
    34  	"net/url"
    35  	"regexp"
    36  )
    37  
    38  // A selection of regular expressions that can be used as .Matching() rules on
    39  // HTML attributes.
    40  var (
    41  	// CellAlign handles the `align` attribute
    42  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
    43  	CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
    44  
    45  	// CellVerticalAlign handles the `valign` attribute
    46  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
    47  	CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
    48  
    49  	// Direction handles the `dir` attribute
    50  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
    51  	Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
    52  
    53  	// ImageAlign handles the `align` attribute on the `image` tag
    54  	// http://www.w3.org/MarkUp/Test/Img/imgtest.html
    55  	ImageAlign = regexp.MustCompile(
    56  		`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
    57  	)
    58  
    59  	// Integer describes whole positive integers (including 0) used in places
    60  	// like td.colspan
    61  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
    62  	Integer = regexp.MustCompile(`^[0-9]+$`)
    63  
    64  	// ISO8601 according to the W3 group is only a subset of the ISO8601
    65  	// standard: http://www.w3.org/TR/NOTE-datetime
    66  	//
    67  	// Used in places like time.datetime
    68  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
    69  	//
    70  	// Matches patterns:
    71  	//  Year:
    72  	//     YYYY (eg 1997)
    73  	//  Year and month:
    74  	//     YYYY-MM (eg 1997-07)
    75  	//  Complete date:
    76  	//     YYYY-MM-DD (eg 1997-07-16)
    77  	//  Complete date plus hours and minutes:
    78  	//     YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
    79  	//  Complete date plus hours, minutes and seconds:
    80  	//     YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
    81  	//  Complete date plus hours, minutes, seconds and a decimal fraction of a
    82  	//  second
    83  	//      YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
    84  	ISO8601 = regexp.MustCompile(
    85  		`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
    86  			`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
    87  	)
    88  
    89  	// ListType encapsulates the common value as well as the latest spec
    90  	// values for lists
    91  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
    92  	ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
    93  
    94  	// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
    95  	// `class` which both contain space delimited lists of data tokens
    96  	// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
    97  	// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
    98  	SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
    99  
   100  	// Number is a double value used on HTML5 meter and progress elements
   101  	// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
   102  	Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
   103  
   104  	// NumberOrPercent is used predominantly as units of measurement in width
   105  	// and height attributes
   106  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
   107  	NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
   108  
   109  	// Paragraph of text in an attribute such as *.'title', img.alt, etc
   110  	// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
   111  	// Note that we are not allowing chars that could close tags like '>'
   112  	Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
   113  
   114  	// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
   115  	// prefix of data URIs that contain common web image formats.
   116  	//
   117  	// This is not exported as it's not useful by itself, and only has value
   118  	// within the AllowDataURIImages func
   119  	dataURIImagePrefix = regexp.MustCompile(
   120  		`^image/(gif|jpeg|png|webp);base64,`,
   121  	)
   122  )
   123  
   124  // AllowStandardURLs is a convenience function that will enable rel="nofollow"
   125  // on "a", "area" and "link" (if you have allowed those elements) and will
   126  // ensure that the URL values are parseable and either relative or belong to the
   127  // "mailto", "http", or "https" schemes
   128  func (p *Policy) AllowStandardURLs() {
   129  	// URLs must be parseable by net/url.Parse()
   130  	p.RequireParseableURLs(true)
   131  
   132  	// !url.IsAbs() is permitted
   133  	p.AllowRelativeURLs(true)
   134  
   135  	// Most common URL schemes only
   136  	p.AllowURLSchemes("mailto", "http", "https")
   137  
   138  	// For all anchors we will add rel="nofollow" if it does not already exist
   139  	// This applies to "a" "area" "link"
   140  	p.RequireNoFollowOnLinks(true)
   141  }
   142  
   143  // AllowStandardAttributes will enable "id", "title" and the language specific
   144  // attributes "dir" and "lang" on all elements that are whitelisted
   145  func (p *Policy) AllowStandardAttributes() {
   146  	// "dir" "lang" are permitted as both language attributes affect charsets
   147  	// and direction of text.
   148  	p.AllowAttrs("dir").Matching(Direction).Globally()
   149  	p.AllowAttrs(
   150  		"lang",
   151  	).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
   152  
   153  	// "id" is permitted. This is pretty much as some HTML elements require this
   154  	// to work well ("dfn" is an example of a "id" being value)
   155  	// This does create a risk that JavaScript and CSS within your web page
   156  	// might identify the wrong elements. Ensure that you select things
   157  	// accurately
   158  	p.AllowAttrs("id").Matching(
   159  		regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
   160  	).Globally()
   161  
   162  	// "title" is permitted as it improves accessibility.
   163  	p.AllowAttrs("title").Matching(Paragraph).Globally()
   164  }
   165  
   166  // AllowStyling presently enables the class attribute globally.
   167  //
   168  // Note: When bluemonday ships a CSS parser and we can safely sanitise that,
   169  // this will also allow sanitized styling of elements via the style attribute.
   170  func (p *Policy) AllowStyling() {
   171  
   172  	// "class" is permitted globally
   173  	p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
   174  }
   175  
   176  // AllowImages enables the img element and some popular attributes. It will also
   177  // ensure that URL values are parseable. This helper does not enable data URI
   178  // images, for that you should also use the AllowDataURIImages() helper.
   179  func (p *Policy) AllowImages() {
   180  
   181  	// "img" is permitted
   182  	p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
   183  	p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
   184  	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
   185  
   186  	// Standard URLs enabled
   187  	p.AllowStandardURLs()
   188  	p.AllowAttrs("src").OnElements("img")
   189  }
   190  
   191  // AllowDataURIImages permits the use of inline images defined in RFC2397
   192  // http://tools.ietf.org/html/rfc2397
   193  // http://en.wikipedia.org/wiki/Data_URI_scheme
   194  //
   195  // Images must have a mimetype matching:
   196  //   image/gif
   197  //   image/jpeg
   198  //   image/png
   199  //   image/webp
   200  //
   201  // NOTE: There is a potential security risk to allowing data URIs and you should
   202  // only permit them on content you already trust.
   203  // http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
   204  // https://capec.mitre.org/data/definitions/244.html
   205  func (p *Policy) AllowDataURIImages() {
   206  
   207  	// URLs must be parseable by net/url.Parse()
   208  	p.RequireParseableURLs(true)
   209  
   210  	// Supply a function to validate images contained within data URI
   211  	p.AllowURLSchemeWithCustomPolicy(
   212  		"data",
   213  		func(url *url.URL) (allowUrl bool) {
   214  			if url.RawQuery != "" || url.Fragment != "" {
   215  				return false
   216  			}
   217  
   218  			matched := dataURIImagePrefix.FindString(url.Opaque)
   219  			if matched == "" {
   220  				return false
   221  			}
   222  
   223  			_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
   224  			if err != nil {
   225  				return false
   226  			}
   227  
   228  			return true
   229  		},
   230  	)
   231  }
   232  
   233  // AllowLists will enabled ordered and unordered lists, as well as definition
   234  // lists
   235  func (p *Policy) AllowLists() {
   236  	// "ol" "ul" are permitted
   237  	p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
   238  
   239  	// "li" is permitted
   240  	p.AllowAttrs("type").Matching(ListType).OnElements("li")
   241  	p.AllowAttrs("value").Matching(Integer).OnElements("li")
   242  
   243  	// "dl" "dt" "dd" are permitted
   244  	p.AllowElements("dl", "dt", "dd")
   245  }
   246  
   247  // AllowTables will enable a rich set of elements and attributes to describe
   248  // HTML tables
   249  func (p *Policy) AllowTables() {
   250  
   251  	// "table" is permitted
   252  	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
   253  	p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
   254  
   255  	// "caption" is permitted
   256  	p.AllowElements("caption")
   257  
   258  	// "col" "colgroup" are permitted
   259  	p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
   260  	p.AllowAttrs("height", "width").Matching(
   261  		NumberOrPercent,
   262  	).OnElements("col", "colgroup")
   263  	p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
   264  	p.AllowAttrs("valign").Matching(
   265  		CellVerticalAlign,
   266  	).OnElements("col", "colgroup")
   267  
   268  	// "thead" "tr" are permitted
   269  	p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
   270  	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
   271  
   272  	// "td" "th" are permitted
   273  	p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
   274  	p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
   275  	p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
   276  	p.AllowAttrs("headers").Matching(
   277  		SpaceSeparatedTokens,
   278  	).OnElements("td", "th")
   279  	p.AllowAttrs("height", "width").Matching(
   280  		NumberOrPercent,
   281  	).OnElements("td", "th")
   282  	p.AllowAttrs(
   283  		"scope",
   284  	).Matching(
   285  		regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
   286  	).OnElements("td", "th")
   287  	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
   288  	p.AllowAttrs("nowrap").Matching(
   289  		regexp.MustCompile(`(?i)|nowrap`),
   290  	).OnElements("td", "th")
   291  
   292  	// "tbody" "tfoot"
   293  	p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
   294  	p.AllowAttrs("valign").Matching(
   295  		CellVerticalAlign,
   296  	).OnElements("tbody", "tfoot")
   297  }