code.gitea.io/gitea@v1.22.3/modules/markup/sanitizer_default.go (about)

     1  // Copyright 2024 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package markup
     5  
     6  import (
     7  	"io"
     8  	"net/url"
     9  	"regexp"
    10  
    11  	"code.gitea.io/gitea/modules/setting"
    12  
    13  	"github.com/microcosm-cc/bluemonday"
    14  )
    15  
    16  func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
    17  	policy := bluemonday.UGCPolicy()
    18  
    19  	// For JS code copy and Mermaid loading state
    20  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
    21  
    22  	// For code preview
    23  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally()
    24  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td")
    25  	policy.AllowAttrs("data-line-number").OnElements("span")
    26  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td")
    27  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div")
    28  
    29  	// For code preview (unicode escape)
    30  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table")
    31  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td")
    32  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form
    33  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span")
    34  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span")
    35  	policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span")
    36  
    37  	// For color preview
    38  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")
    39  
    40  	// For attention
    41  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote")
    42  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
    43  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg")
    44  	policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
    45  	policy.AllowAttrs("fill-rule", "d").OnElements("path")
    46  
    47  	// For Chroma markdown plugin
    48  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")
    49  
    50  	// Checkboxes
    51  	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
    52  	policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
    53  
    54  	// Custom URL-Schemes
    55  	if len(setting.Markdown.CustomURLSchemes) > 0 {
    56  		policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
    57  	} else {
    58  		policy.AllowURLSchemesMatching(st.allowAllRegex)
    59  
    60  		// Even if every scheme is allowed, these three are blocked for security reasons
    61  		disallowScheme := func(*url.URL) bool {
    62  			return false
    63  		}
    64  		policy.AllowURLSchemeWithCustomPolicy("javascript", disallowScheme)
    65  		policy.AllowURLSchemeWithCustomPolicy("vbscript", disallowScheme)
    66  		policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme)
    67  	}
    68  
    69  	// Allow classes for anchors
    70  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
    71  
    72  	// Allow classes for task lists
    73  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
    74  
    75  	// Allow classes for org mode list item status.
    76  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li")
    77  
    78  	// Allow icons
    79  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
    80  
    81  	// Allow classes for emojis
    82  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
    83  
    84  	// Allow icons, emojis, chroma syntax and keyword markup on span
    85  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
    86  
    87  	// Allow 'color' and 'background-color' properties for the style attribute on text elements.
    88  	policy.AllowStyles("color", "background-color").OnElements("span", "p")
    89  
    90  	// Allow generally safe attributes
    91  	generalSafeAttrs := []string{
    92  		"abbr", "accept", "accept-charset",
    93  		"accesskey", "action", "align", "alt",
    94  		"aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
    95  		"axis", "border", "cellpadding", "cellspacing", "char",
    96  		"charoff", "charset", "checked",
    97  		"clear", "cols", "colspan", "color",
    98  		"compact", "coords", "datetime", "dir",
    99  		"disabled", "enctype", "for", "frame",
   100  		"headers", "height", "hreflang",
   101  		"hspace", "ismap", "label", "lang",
   102  		"maxlength", "media", "method",
   103  		"multiple", "name", "nohref", "noshade",
   104  		"nowrap", "open", "prompt", "readonly", "rel", "rev",
   105  		"rows", "rowspan", "rules", "scope",
   106  		"selected", "shape", "size", "span",
   107  		"start", "summary", "tabindex", "target",
   108  		"title", "type", "usemap", "valign", "value",
   109  		"vspace", "width", "itemprop",
   110  	}
   111  
   112  	generalSafeElements := []string{
   113  		"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
   114  		"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
   115  		"dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
   116  		"details", "caption", "figure", "figcaption",
   117  		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
   118  	}
   119  
   120  	policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
   121  
   122  	policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
   123  
   124  	policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
   125  
   126  	// FIXME: Need to handle longdesc in img but there is no easy way to do it
   127  
   128  	// Custom keyword markup
   129  	defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules)
   130  
   131  	return policy
   132  }
   133  
   134  // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
   135  func Sanitize(s string) string {
   136  	return GetDefaultSanitizer().defaultPolicy.Sanitize(s)
   137  }
   138  
   139  // SanitizeReader sanitizes a Reader
   140  func SanitizeReader(r io.Reader, renderer string, w io.Writer) error {
   141  	policy, exist := GetDefaultSanitizer().rendererPolicies[renderer]
   142  	if !exist {
   143  		policy = GetDefaultSanitizer().defaultPolicy
   144  	}
   145  	return policy.SanitizeReaderToWriter(r, w)
   146  }