code.gitea.io/gitea@v1.19.3/modules/markup/sanitizer.go (about) 1 // Copyright 2017 The Gitea Authors. All rights reserved. 2 // Copyright 2017 The Gogs Authors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 5 package markup 6 7 import ( 8 "io" 9 "regexp" 10 "sync" 11 12 "code.gitea.io/gitea/modules/setting" 13 14 "github.com/microcosm-cc/bluemonday" 15 ) 16 17 // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow 18 // any modification to the underlying policies once it's been created. 19 type Sanitizer struct { 20 defaultPolicy *bluemonday.Policy 21 rendererPolicies map[string]*bluemonday.Policy 22 init sync.Once 23 } 24 25 var sanitizer = &Sanitizer{} 26 27 // NewSanitizer initializes sanitizer with allowed attributes based on settings. 28 // Multiple calls to this function will only create one instance of Sanitizer during 29 // entire application lifecycle. 30 func NewSanitizer() { 31 sanitizer.init.Do(func() { 32 InitializeSanitizer() 33 }) 34 } 35 36 // InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings 37 func InitializeSanitizer() { 38 sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} 39 sanitizer.defaultPolicy = createDefaultPolicy() 40 41 for name, renderer := range renderers { 42 sanitizerRules := renderer.SanitizerRules() 43 if len(sanitizerRules) > 0 { 44 policy := createDefaultPolicy() 45 addSanitizerRules(policy, sanitizerRules) 46 sanitizer.rendererPolicies[name] = policy 47 } 48 } 49 } 50 51 func createDefaultPolicy() *bluemonday.Policy { 52 policy := bluemonday.UGCPolicy() 53 54 // For JS code copy and Mermaid loading state 55 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") 56 57 // For color preview 58 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span") 59 60 // For attention 61 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong") 62 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+$`)).OnElements("span", "strong") 63 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^svg octicon-\w+$`)).OnElements("svg") 64 policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg") 65 policy.AllowAttrs("fill-rule", "d").OnElements("path") 66 67 // For Chroma markdown plugin 68 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code") 69 70 // Checkboxes 71 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 72 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 73 74 // Custom URL-Schemes 75 if len(setting.Markdown.CustomURLSchemes) > 0 { 76 policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) 77 } 78 79 // Allow classes for anchors 80 policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") 81 82 // Allow classes for task lists 83 policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") 84 85 // Allow icons 86 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") 87 88 // Allow unlabelled labels 89 policy.AllowNoAttrs().OnElements("label") 90 91 // Allow classes for emojis 92 policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") 93 94 // Allow icons, emojis, chroma syntax and keyword markup on span 95 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") 96 97 // Allow 'style' attribute on text elements. 98 policy.AllowAttrs("style").OnElements("span", "p") 99 100 // Allow 'color' and 'background-color' properties for the style attribute on text elements. 101 policy.AllowStyles("color", "background-color").OnElements("span", "p") 102 103 // Allow generally safe attributes 104 generalSafeAttrs := []string{ 105 "abbr", "accept", "accept-charset", 106 "accesskey", "action", "align", "alt", 107 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 108 "axis", "border", "cellpadding", "cellspacing", "char", 109 "charoff", "charset", "checked", 110 "clear", "cols", "colspan", "color", 111 "compact", "coords", "datetime", "dir", 112 "disabled", "enctype", "for", "frame", 113 "headers", "height", "hreflang", 114 "hspace", "ismap", "label", "lang", 115 "maxlength", "media", "method", 116 "multiple", "name", "nohref", "noshade", 117 "nowrap", "open", "prompt", "readonly", "rel", "rev", 118 "rows", "rowspan", "rules", "scope", 119 "selected", "shape", "size", "span", 120 "start", "summary", "tabindex", "target", 121 "title", "type", "usemap", "valign", "value", 122 "vspace", "width", "itemprop", 123 } 124 125 generalSafeElements := []string{ 126 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 127 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", 128 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 129 "details", "caption", "figure", "figcaption", 130 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", 131 } 132 133 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 134 135 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") 136 137 policy.AllowAttrs("itemscope", "itemtype").OnElements("div") 138 139 // FIXME: Need to handle longdesc in img but there is no easy way to do it 140 141 // Custom keyword markup 142 addSanitizerRules(policy, setting.ExternalSanitizerRules) 143 144 return policy 145 } 146 147 func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { 148 for _, rule := range rules { 149 if rule.AllowDataURIImages { 150 policy.AllowDataURIImages() 151 } 152 if rule.Element != "" { 153 if rule.Regexp != nil { 154 policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) 155 } else { 156 policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) 157 } 158 } 159 } 160 } 161 162 // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. 163 func Sanitize(s string) string { 164 NewSanitizer() 165 return sanitizer.defaultPolicy.Sanitize(s) 166 } 167 168 // SanitizeReader sanitizes a Reader 169 func SanitizeReader(r io.Reader, renderer string, w io.Writer) error { 170 NewSanitizer() 171 policy, exist := sanitizer.rendererPolicies[renderer] 172 if !exist { 173 policy = sanitizer.defaultPolicy 174 } 175 return policy.SanitizeReaderToWriter(r, w) 176 }