code.gitea.io/gitea@v1.22.3/modules/markup/sanitizer_default.go (about) 1 // Copyright 2024 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package markup 5 6 import ( 7 "io" 8 "net/url" 9 "regexp" 10 11 "code.gitea.io/gitea/modules/setting" 12 13 "github.com/microcosm-cc/bluemonday" 14 ) 15 16 func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { 17 policy := bluemonday.UGCPolicy() 18 19 // For JS code copy and Mermaid loading state 20 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") 21 22 // For code preview 23 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally() 24 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td") 25 policy.AllowAttrs("data-line-number").OnElements("span") 26 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td") 27 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div") 28 29 // For code preview (unicode escape) 30 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table") 31 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td") 32 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form 33 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span") 34 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span") 35 policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span") 36 37 // For color preview 38 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span") 39 40 // For attention 41 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote") 42 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong") 43 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg") 44 policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg") 45 policy.AllowAttrs("fill-rule", "d").OnElements("path") 46 47 // For Chroma markdown plugin 48 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code") 49 50 // Checkboxes 51 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 52 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 53 54 // Custom URL-Schemes 55 if len(setting.Markdown.CustomURLSchemes) > 0 { 56 policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) 57 } else { 58 policy.AllowURLSchemesMatching(st.allowAllRegex) 59 60 // Even if every scheme is allowed, these three are blocked for security reasons 61 disallowScheme := func(*url.URL) bool { 62 return false 63 } 64 policy.AllowURLSchemeWithCustomPolicy("javascript", disallowScheme) 65 policy.AllowURLSchemeWithCustomPolicy("vbscript", disallowScheme) 66 policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme) 67 } 68 69 // Allow classes for anchors 70 policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") 71 72 // Allow classes for task lists 73 policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") 74 75 // Allow classes for org mode list item status. 76 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li") 77 78 // Allow icons 79 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") 80 81 // Allow classes for emojis 82 policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") 83 84 // Allow icons, emojis, chroma syntax and keyword markup on span 85 policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") 86 87 // Allow 'color' and 'background-color' properties for the style attribute on text elements. 88 policy.AllowStyles("color", "background-color").OnElements("span", "p") 89 90 // Allow generally safe attributes 91 generalSafeAttrs := []string{ 92 "abbr", "accept", "accept-charset", 93 "accesskey", "action", "align", "alt", 94 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 95 "axis", "border", "cellpadding", "cellspacing", "char", 96 "charoff", "charset", "checked", 97 "clear", "cols", "colspan", "color", 98 "compact", "coords", "datetime", "dir", 99 "disabled", "enctype", "for", "frame", 100 "headers", "height", "hreflang", 101 "hspace", "ismap", "label", "lang", 102 "maxlength", "media", "method", 103 "multiple", "name", "nohref", "noshade", 104 "nowrap", "open", "prompt", "readonly", "rel", "rev", 105 "rows", "rowspan", "rules", "scope", 106 "selected", "shape", "size", "span", 107 "start", "summary", "tabindex", "target", 108 "title", "type", "usemap", "valign", "value", 109 "vspace", "width", "itemprop", 110 } 111 112 generalSafeElements := []string{ 113 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 114 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", 115 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 116 "details", "caption", "figure", "figcaption", 117 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", 118 } 119 120 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 121 122 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") 123 124 policy.AllowAttrs("itemscope", "itemtype").OnElements("div") 125 126 // FIXME: Need to handle longdesc in img but there is no easy way to do it 127 128 // Custom keyword markup 129 defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules) 130 131 return policy 132 } 133 134 // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. 135 func Sanitize(s string) string { 136 return GetDefaultSanitizer().defaultPolicy.Sanitize(s) 137 } 138 139 // SanitizeReader sanitizes a Reader 140 func SanitizeReader(r io.Reader, renderer string, w io.Writer) error { 141 policy, exist := GetDefaultSanitizer().rendererPolicies[renderer] 142 if !exist { 143 policy = GetDefaultSanitizer().defaultPolicy 144 } 145 return policy.SanitizeReaderToWriter(r, w) 146 }