code.gitea.io/gitea@v1.19.3/modules/markup/sanitizer.go (about)

     1  // Copyright 2017 The Gitea Authors. All rights reserved.
     2  // Copyright 2017 The Gogs Authors. All rights reserved.
     3  // SPDX-License-Identifier: MIT
     4  
     5  package markup
     6  
     7  import (
     8  	"io"
     9  	"regexp"
    10  	"sync"
    11  
    12  	"code.gitea.io/gitea/modules/setting"
    13  
    14  	"github.com/microcosm-cc/bluemonday"
    15  )
    16  
    17  // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
    18  // any modification to the underlying policies once it's been created.
    19  type Sanitizer struct {
    20  	defaultPolicy    *bluemonday.Policy
    21  	rendererPolicies map[string]*bluemonday.Policy
    22  	init             sync.Once
    23  }
    24  
    25  var sanitizer = &Sanitizer{}
    26  
    27  // NewSanitizer initializes sanitizer with allowed attributes based on settings.
    28  // Multiple calls to this function will only create one instance of Sanitizer during
    29  // entire application lifecycle.
    30  func NewSanitizer() {
    31  	sanitizer.init.Do(func() {
    32  		InitializeSanitizer()
    33  	})
    34  }
    35  
    36  // InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
    37  func InitializeSanitizer() {
    38  	sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
    39  	sanitizer.defaultPolicy = createDefaultPolicy()
    40  
    41  	for name, renderer := range renderers {
    42  		sanitizerRules := renderer.SanitizerRules()
    43  		if len(sanitizerRules) > 0 {
    44  			policy := createDefaultPolicy()
    45  			addSanitizerRules(policy, sanitizerRules)
    46  			sanitizer.rendererPolicies[name] = policy
    47  		}
    48  	}
    49  }
    50  
    51  func createDefaultPolicy() *bluemonday.Policy {
    52  	policy := bluemonday.UGCPolicy()
    53  
    54  	// For JS code copy and Mermaid loading state
    55  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
    56  
    57  	// For color preview
    58  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")
    59  
    60  	// For attention
    61  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
    62  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+$`)).OnElements("span", "strong")
    63  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^svg octicon-\w+$`)).OnElements("svg")
    64  	policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
    65  	policy.AllowAttrs("fill-rule", "d").OnElements("path")
    66  
    67  	// For Chroma markdown plugin
    68  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")
    69  
    70  	// Checkboxes
    71  	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
    72  	policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
    73  
    74  	// Custom URL-Schemes
    75  	if len(setting.Markdown.CustomURLSchemes) > 0 {
    76  		policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
    77  	}
    78  
    79  	// Allow classes for anchors
    80  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
    81  
    82  	// Allow classes for task lists
    83  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
    84  
    85  	// Allow icons
    86  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
    87  
    88  	// Allow unlabelled labels
    89  	policy.AllowNoAttrs().OnElements("label")
    90  
    91  	// Allow classes for emojis
    92  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
    93  
    94  	// Allow icons, emojis, chroma syntax and keyword markup on span
    95  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
    96  
    97  	// Allow 'style' attribute on text elements.
    98  	policy.AllowAttrs("style").OnElements("span", "p")
    99  
   100  	// Allow 'color' and 'background-color' properties for the style attribute on text elements.
   101  	policy.AllowStyles("color", "background-color").OnElements("span", "p")
   102  
   103  	// Allow generally safe attributes
   104  	generalSafeAttrs := []string{
   105  		"abbr", "accept", "accept-charset",
   106  		"accesskey", "action", "align", "alt",
   107  		"aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
   108  		"axis", "border", "cellpadding", "cellspacing", "char",
   109  		"charoff", "charset", "checked",
   110  		"clear", "cols", "colspan", "color",
   111  		"compact", "coords", "datetime", "dir",
   112  		"disabled", "enctype", "for", "frame",
   113  		"headers", "height", "hreflang",
   114  		"hspace", "ismap", "label", "lang",
   115  		"maxlength", "media", "method",
   116  		"multiple", "name", "nohref", "noshade",
   117  		"nowrap", "open", "prompt", "readonly", "rel", "rev",
   118  		"rows", "rowspan", "rules", "scope",
   119  		"selected", "shape", "size", "span",
   120  		"start", "summary", "tabindex", "target",
   121  		"title", "type", "usemap", "valign", "value",
   122  		"vspace", "width", "itemprop",
   123  	}
   124  
   125  	generalSafeElements := []string{
   126  		"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
   127  		"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote",
   128  		"dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
   129  		"details", "caption", "figure", "figcaption",
   130  		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr",
   131  	}
   132  
   133  	policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
   134  
   135  	policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
   136  
   137  	policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
   138  
   139  	// FIXME: Need to handle longdesc in img but there is no easy way to do it
   140  
   141  	// Custom keyword markup
   142  	addSanitizerRules(policy, setting.ExternalSanitizerRules)
   143  
   144  	return policy
   145  }
   146  
   147  func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
   148  	for _, rule := range rules {
   149  		if rule.AllowDataURIImages {
   150  			policy.AllowDataURIImages()
   151  		}
   152  		if rule.Element != "" {
   153  			if rule.Regexp != nil {
   154  				policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
   155  			} else {
   156  				policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
   157  			}
   158  		}
   159  	}
   160  }
   161  
   162  // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
   163  func Sanitize(s string) string {
   164  	NewSanitizer()
   165  	return sanitizer.defaultPolicy.Sanitize(s)
   166  }
   167  
   168  // SanitizeReader sanitizes a Reader
   169  func SanitizeReader(r io.Reader, renderer string, w io.Writer) error {
   170  	NewSanitizer()
   171  	policy, exist := sanitizer.rendererPolicies[renderer]
   172  	if !exist {
   173  		policy = sanitizer.defaultPolicy
   174  	}
   175  	return policy.SanitizeReaderToWriter(r, w)
   176  }