github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/markup/sanitizer.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 // Copyright 2017 The GitBundle Authors. All rights reserved. 7 // Copyright 2017 The Gogs Authors. All rights reserved. 8 // Use of this source code is governed by a MIT-style 9 // license that can be found in the LICENSE file. 10 11 package markup 12 13 import ( 14 "io" 15 "regexp" 16 "sync" 17 18 "github.com/gitbundle/modules/setting" 19 20 "github.com/microcosm-cc/bluemonday" 21 ) 22 23 // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow 24 // any modification to the underlying policies once it's been created. 25 type Sanitizer struct { 26 defaultPolicy *bluemonday.Policy 27 rendererPolicies map[string]*bluemonday.Policy 28 init sync.Once 29 } 30 31 var sanitizer = &Sanitizer{} 32 33 // NewSanitizer initializes sanitizer with allowed attributes based on settings. 34 // Multiple calls to this function will only create one instance of Sanitizer during 35 // entire application lifecycle. 36 func NewSanitizer() { 37 sanitizer.init.Do(func() { 38 InitializeSanitizer() 39 }) 40 } 41 42 // InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings 43 func InitializeSanitizer() { 44 sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} 45 sanitizer.defaultPolicy = createDefaultPolicy() 46 47 for name, renderer := range renderers { 48 sanitizerRules := renderer.SanitizerRules() 49 if len(sanitizerRules) > 0 { 50 policy := createDefaultPolicy() 51 addSanitizerRules(policy, sanitizerRules) 52 sanitizer.rendererPolicies[name] = policy 53 } 54 } 55 } 56 57 func createDefaultPolicy() *bluemonday.Policy { 58 policy := bluemonday.UGCPolicy() 59 60 // For JS code copy and Mermaid loading state 61 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") 62 63 // For Chroma markdown plugin 64 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") 65 66 // Checkboxes 67 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 68 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 69 70 // Custom URL-Schemes 71 if len(setting.Markdown.CustomURLSchemes) > 0 { 72 policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) 73 } 74 75 // Allow classes for anchors 76 policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") 77 78 // Allow classes for task lists 79 policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") 80 81 // Allow icons 82 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") 83 84 // Allow unlabelled labels 85 policy.AllowNoAttrs().OnElements("label") 86 87 // Allow classes for emojis 88 policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") 89 90 // Allow icons, emojis, chroma syntax and keyword markup on span 91 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") 92 93 // Allow generally safe attributes 94 generalSafeAttrs := []string{ 95 "abbr", "accept", "accept-charset", 96 "accesskey", "action", "align", "alt", 97 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 98 "axis", "border", "cellpadding", "cellspacing", "char", 99 "charoff", "charset", "checked", 100 "clear", "cols", "colspan", "color", 101 "compact", "coords", "datetime", "dir", 102 "disabled", "enctype", "for", "frame", 103 "headers", "height", "hreflang", 104 "hspace", "ismap", "label", "lang", 105 "maxlength", "media", "method", 106 "multiple", "name", "nohref", "noshade", 107 "nowrap", "open", "prompt", "readonly", "rel", "rev", 108 "rows", "rowspan", "rules", "scope", 109 "selected", "shape", "size", "span", 110 "start", "summary", "tabindex", "target", 111 "title", "type", "usemap", "valign", "value", 112 "vspace", "width", "itemprop", 113 } 114 115 generalSafeElements := []string{ 116 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 117 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", 118 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 119 "details", "caption", "figure", "figcaption", 120 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", 121 } 122 123 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 124 125 policy.AllowAttrs("itemscope", "itemtype").OnElements("div") 126 127 // FIXME: Need to handle longdesc in img but there is no easy way to do it 128 129 // Custom keyword markup 130 addSanitizerRules(policy, setting.ExternalSanitizerRules) 131 132 return policy 133 } 134 135 func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { 136 for _, rule := range rules { 137 if rule.AllowDataURIImages { 138 policy.AllowDataURIImages() 139 } 140 if rule.Element != "" { 141 if rule.Regexp != nil { 142 policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) 143 } else { 144 policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) 145 } 146 } 147 } 148 } 149 150 // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. 151 func Sanitize(s string) string { 152 NewSanitizer() 153 return sanitizer.defaultPolicy.Sanitize(s) 154 } 155 156 // SanitizeReader sanitizes a Reader 157 func SanitizeReader(r io.Reader, renderer string, w io.Writer) error { 158 NewSanitizer() 159 policy, exist := sanitizer.rendererPolicies[renderer] 160 if !exist { 161 policy = sanitizer.defaultPolicy 162 } 163 return policy.SanitizeReaderToWriter(r, w) 164 }