github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/bluemonday/policy.go (about) 1 // Copyright (c) 2014, David Kitchen <david@buro9.com> 2 // 3 // All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, this 9 // list of conditions and the following disclaimer. 10 // 11 // * Redistributions in binary form must reproduce the above copyright notice, 12 // this list of conditions and the following disclaimer in the documentation 13 // and/or other materials provided with the distribution. 14 // 15 // * Neither the name of the organisation (Microcosm) nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 package bluemonday 31 32 import ( 33 "net/url" 34 "regexp" 35 "strings" 36 ) 37 38 // Policy encapsulates the whitelist of HTML elements and attributes that will 39 // be applied to the sanitised HTML. 40 // 41 // You should use bluemonday.NewPolicy() to create a blank policy as the 42 // unexported fields contain maps that need to be initialized. 43 type Policy struct { 44 45 // Declares whether the maps have been initialized, used as a cheap check to 46 // ensure that those using Policy{} directly won't cause nil pointer 47 // exceptions 48 initialized bool 49 50 // Allows the <!DOCTYPE > tag to exist in the sanitized document 51 allowDocType bool 52 53 // When true, add rel="nofollow" to HTML anchors 54 requireNoFollow bool 55 56 // When true, add rel="nofollow" to HTML anchors 57 // Will add for href="http://foo" 58 // Will skip for href="/foo" or href="foo" 59 requireNoFollowFullyQualifiedLinks bool 60 61 // When true add target="_blank" to fully qualified links 62 // Will add for href="http://foo" 63 // Will skip for href="/foo" or href="foo" 64 addTargetBlankToFullyQualifiedLinks bool 65 66 // When true, URLs must be parseable by "net/url" url.Parse() 67 requireParseableURLs bool 68 69 // When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted 70 allowRelativeURLs bool 71 72 // map[htmlElementName]map[htmlAttributeName]attrPolicy 73 elsAndAttrs map[string]map[string]attrPolicy 74 75 // map[htmlAttributeName]attrPolicy 76 globalAttrs map[string]attrPolicy 77 78 // If urlPolicy is nil, all URLs with matching schema are allowed. 79 // Otherwise, only the URLs with matching schema and urlPolicy(url) 80 // returning true are allowed. 81 allowURLSchemes map[string]urlPolicy 82 83 // If an element has had all attributes removed as a result of a policy 84 // being applied, then the element would be removed from the output. 85 // 86 // However some elements are valid and have strong layout meaning without 87 // any attributes, i.e. <table>. To prevent those being removed we maintain 88 // a list of elements that are allowed to have no attributes and that will 89 // be maintained in the output HTML. 90 setOfElementsAllowedWithoutAttrs map[string]struct{} 91 92 setOfElementsToSkipContent map[string]struct{} 93 } 94 95 type attrPolicy struct { 96 97 // optional pattern to match, when not nil the regexp needs to match 98 // otherwise the attribute is removed 99 regexp *regexp.Regexp 100 } 101 102 type attrPolicyBuilder struct { 103 p *Policy 104 105 attrNames []string 106 regexp *regexp.Regexp 107 allowEmpty bool 108 } 109 110 type urlPolicy func(url *url.URL) (allowUrl bool) 111 112 // init initializes the maps if this has not been done already 113 func (p *Policy) init() { 114 if !p.initialized { 115 p.elsAndAttrs = make(map[string]map[string]attrPolicy) 116 p.globalAttrs = make(map[string]attrPolicy) 117 p.allowURLSchemes = make(map[string]urlPolicy) 118 p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{}) 119 p.setOfElementsToSkipContent = make(map[string]struct{}) 120 p.initialized = true 121 } 122 } 123 124 // NewPolicy returns a blank policy with nothing whitelisted or permitted. This 125 // is the recommended way to start building a policy and you should now use 126 // AllowAttrs() and/or AllowElements() to construct the whitelist of HTML 127 // elements and attributes. 128 func NewPolicy() *Policy { 129 130 p := Policy{} 131 132 p.addDefaultElementsWithoutAttrs() 133 p.addDefaultSkipElementContent() 134 135 return &p 136 } 137 138 // AllowAttrs takes a range of HTML attribute names and returns an 139 // attribute policy builder that allows you to specify the pattern and scope of 140 // the whitelisted attribute. 141 // 142 // The attribute policy is only added to the core policy when either Globally() 143 // or OnElements(...) are called. 144 func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder { 145 146 p.init() 147 148 abp := attrPolicyBuilder{ 149 p: p, 150 allowEmpty: false, 151 } 152 153 for _, attrName := range attrNames { 154 abp.attrNames = append(abp.attrNames, strings.ToLower(attrName)) 155 } 156 157 return &abp 158 } 159 160 // AllowNoAttrs says that attributes on element are optional. 161 // 162 // The attribute policy is only added to the core policy when OnElements(...) 163 // are called. 164 func (p *Policy) AllowNoAttrs() *attrPolicyBuilder { 165 166 p.init() 167 168 abp := attrPolicyBuilder{ 169 p: p, 170 allowEmpty: true, 171 } 172 return &abp 173 } 174 175 // AllowNoAttrs says that attributes on element are optional. 176 // 177 // The attribute policy is only added to the core policy when OnElements(...) 178 // are called. 179 func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder { 180 181 abp.allowEmpty = true 182 183 return abp 184 } 185 186 // Matching allows a regular expression to be applied to a nascent attribute 187 // policy, and returns the attribute policy. Calling this more than once will 188 // replace the existing regexp. 189 func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder { 190 191 abp.regexp = regex 192 193 return abp 194 } 195 196 // OnElements will bind an attribute policy to a given range of HTML elements 197 // and return the updated policy 198 func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy { 199 200 for _, element := range elements { 201 element = strings.ToLower(element) 202 203 for _, attr := range abp.attrNames { 204 205 if _, ok := abp.p.elsAndAttrs[element]; !ok { 206 abp.p.elsAndAttrs[element] = make(map[string]attrPolicy) 207 } 208 209 ap := attrPolicy{} 210 if abp.regexp != nil { 211 ap.regexp = abp.regexp 212 } 213 214 abp.p.elsAndAttrs[element][attr] = ap 215 } 216 217 if abp.allowEmpty { 218 abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{} 219 220 if _, ok := abp.p.elsAndAttrs[element]; !ok { 221 abp.p.elsAndAttrs[element] = make(map[string]attrPolicy) 222 } 223 } 224 } 225 226 return abp.p 227 } 228 229 // Globally will bind an attribute policy to all HTML elements and return the 230 // updated policy 231 func (abp *attrPolicyBuilder) Globally() *Policy { 232 233 for _, attr := range abp.attrNames { 234 if _, ok := abp.p.globalAttrs[attr]; !ok { 235 abp.p.globalAttrs[attr] = attrPolicy{} 236 } 237 238 ap := attrPolicy{} 239 if abp.regexp != nil { 240 ap.regexp = abp.regexp 241 } 242 243 abp.p.globalAttrs[attr] = ap 244 } 245 246 return abp.p 247 } 248 249 // AllowElements will append HTML elements to the whitelist without applying an 250 // attribute policy to those elements (the elements are permitted 251 // sans-attributes) 252 func (p *Policy) AllowElements(names ...string) *Policy { 253 p.init() 254 255 for _, element := range names { 256 element = strings.ToLower(element) 257 258 if _, ok := p.elsAndAttrs[element]; !ok { 259 p.elsAndAttrs[element] = make(map[string]attrPolicy) 260 } 261 } 262 263 return p 264 } 265 266 // RequireNoFollowOnLinks will result in all <a> tags having a rel="nofollow" 267 // added to them if one does not already exist 268 // 269 // Note: This requires p.RequireParseableURLs(true) and will enable it. 270 func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy { 271 272 p.requireNoFollow = require 273 p.requireParseableURLs = true 274 275 return p 276 } 277 278 // RequireNoFollowOnFullyQualifiedLinks will result in all <a> tags that point 279 // to a non-local destination (i.e. starts with a protocol and has a host) 280 // having a rel="nofollow" added to them if one does not already exist 281 // 282 // Note: This requires p.RequireParseableURLs(true) and will enable it. 283 func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy { 284 285 p.requireNoFollowFullyQualifiedLinks = require 286 p.requireParseableURLs = true 287 288 return p 289 } 290 291 // AddTargetBlankToFullyQualifiedLinks will result in all <a> tags that point 292 // to a non-local destination (i.e. starts with a protocol and has a host) 293 // having a target="_blank" added to them if one does not already exist 294 // 295 // Note: This requires p.RequireParseableURLs(true) and will enable it. 296 func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy { 297 298 p.addTargetBlankToFullyQualifiedLinks = require 299 p.requireParseableURLs = true 300 301 return p 302 } 303 304 // RequireParseableURLs will result in all URLs requiring that they be parseable 305 // by "net/url" url.Parse() 306 // This applies to: 307 // - a.href 308 // - area.href 309 // - blockquote.cite 310 // - img.src 311 // - link.href 312 // - script.src 313 func (p *Policy) RequireParseableURLs(require bool) *Policy { 314 315 p.requireParseableURLs = require 316 317 return p 318 } 319 320 // AllowRelativeURLs enables RequireParseableURLs and then permits URLs that 321 // are parseable, have no schema information and url.IsAbs() returns false 322 // This permits local URLs 323 func (p *Policy) AllowRelativeURLs(require bool) *Policy { 324 325 p.RequireParseableURLs(true) 326 p.allowRelativeURLs = require 327 328 return p 329 } 330 331 // AllowURLSchemes will append URL schemes to the whitelist 332 // Example: p.AllowURLSchemes("mailto", "http", "https") 333 func (p *Policy) AllowURLSchemes(schemes ...string) *Policy { 334 p.init() 335 336 p.RequireParseableURLs(true) 337 338 for _, scheme := range schemes { 339 scheme = strings.ToLower(scheme) 340 341 // Allow all URLs with matching scheme. 342 p.allowURLSchemes[scheme] = nil 343 } 344 345 return p 346 } 347 348 // AllowURLSchemeWithCustomPolicy will append URL schemes with 349 // a custom URL policy to the whitelist. 350 // Only the URLs with matching schema and urlPolicy(url) 351 // returning true will be allowed. 352 func (p *Policy) AllowURLSchemeWithCustomPolicy( 353 scheme string, 354 urlPolicy func(url *url.URL) (allowUrl bool), 355 ) *Policy { 356 357 p.init() 358 359 p.RequireParseableURLs(true) 360 361 scheme = strings.ToLower(scheme) 362 363 p.allowURLSchemes[scheme] = urlPolicy 364 365 return p 366 } 367 368 // AllowDocType states whether the HTML sanitised by the sanitizer is allowed to 369 // contain the HTML DocType tag: <!DOCTYPE HTML> or one of it's variants. 370 // 371 // The HTML spec only permits one doctype per document, and as you know how you 372 // are using the output of this, you know best as to whether we should ignore it 373 // (default) or not. 374 // 375 // If you are sanitizing a HTML fragment the default (false) is fine. 376 func (p *Policy) AllowDocType(allow bool) *Policy { 377 378 p.allowDocType = allow 379 380 return p 381 } 382 383 // SkipElementsContent adds the HTML elements whose tags is needed to be removed 384 // with it's content. 385 func (p *Policy) SkipElementsContent(names ...string) *Policy { 386 387 p.init() 388 389 for _, element := range names { 390 element = strings.ToLower(element) 391 392 if _, ok := p.setOfElementsToSkipContent[element]; !ok { 393 p.setOfElementsToSkipContent[element] = struct{}{} 394 } 395 } 396 397 return p 398 } 399 400 // addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid 401 // without any attributes to an internal map. 402 // i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr 403 // is mandatory 404 func (p *Policy) addDefaultElementsWithoutAttrs() { 405 p.init() 406 407 p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{} 408 p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{} 409 p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{} 410 p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{} 411 p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{} 412 p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{} 413 p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{} 414 p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{} 415 p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{} 416 p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{} 417 p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{} 418 p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{} 419 p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{} 420 p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{} 421 p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{} 422 p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{} 423 p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{} 424 p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{} 425 p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{} 426 p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{} 427 p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{} 428 p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{} 429 p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{} 430 p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{} 431 p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{} 432 p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{} 433 p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{} 434 p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{} 435 p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{} 436 p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{} 437 p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{} 438 p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{} 439 p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{} 440 p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{} 441 p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{} 442 p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{} 443 p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{} 444 p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{} 445 p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{} 446 p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{} 447 p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{} 448 p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{} 449 p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{} 450 p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{} 451 p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{} 452 p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{} 453 p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{} 454 p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{} 455 p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{} 456 p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{} 457 p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{} 458 p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{} 459 p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{} 460 p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{} 461 p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{} 462 p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{} 463 p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{} 464 p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{} 465 p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{} 466 p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{} 467 p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{} 468 p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{} 469 p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{} 470 p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{} 471 p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{} 472 p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{} 473 p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{} 474 p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{} 475 p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{} 476 p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{} 477 p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{} 478 p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{} 479 p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{} 480 p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{} 481 p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{} 482 p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{} 483 p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{} 484 p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{} 485 p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{} 486 p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{} 487 p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{} 488 p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{} 489 p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{} 490 p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{} 491 492 } 493 494 // addDefaultSkipElementContent adds the HTML elements that we should skip 495 // rendering the character content of, if the element itself is not allowed. 496 // This is all character data that the end user would not normally see. 497 // i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or 498 // anything else until we encounter the closing </script> tag. 499 func (p *Policy) addDefaultSkipElementContent() { 500 p.init() 501 502 p.setOfElementsToSkipContent["frame"] = struct{}{} 503 p.setOfElementsToSkipContent["frameset"] = struct{}{} 504 p.setOfElementsToSkipContent["iframe"] = struct{}{} 505 p.setOfElementsToSkipContent["noembed"] = struct{}{} 506 p.setOfElementsToSkipContent["noframes"] = struct{}{} 507 p.setOfElementsToSkipContent["noscript"] = struct{}{} 508 p.setOfElementsToSkipContent["nostyle"] = struct{}{} 509 p.setOfElementsToSkipContent["object"] = struct{}{} 510 p.setOfElementsToSkipContent["script"] = struct{}{} 511 p.setOfElementsToSkipContent["style"] = struct{}{} 512 p.setOfElementsToSkipContent["title"] = struct{}{} 513 }