github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/markup/sanitizer.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  // Copyright 2017 The GitBundle Authors. All rights reserved.
     7  // Copyright 2017 The Gogs Authors. All rights reserved.
     8  // Use of this source code is governed by a MIT-style
     9  // license that can be found in the LICENSE file.
    10  
    11  package markup
    12  
    13  import (
    14  	"io"
    15  	"regexp"
    16  	"sync"
    17  
    18  	"github.com/gitbundle/modules/setting"
    19  
    20  	"github.com/microcosm-cc/bluemonday"
    21  )
    22  
    23  // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
    24  // any modification to the underlying policies once it's been created.
    25  type Sanitizer struct {
    26  	defaultPolicy    *bluemonday.Policy
    27  	rendererPolicies map[string]*bluemonday.Policy
    28  	init             sync.Once
    29  }
    30  
    31  var sanitizer = &Sanitizer{}
    32  
    33  // NewSanitizer initializes sanitizer with allowed attributes based on settings.
    34  // Multiple calls to this function will only create one instance of Sanitizer during
    35  // entire application lifecycle.
    36  func NewSanitizer() {
    37  	sanitizer.init.Do(func() {
    38  		InitializeSanitizer()
    39  	})
    40  }
    41  
    42  // InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
    43  func InitializeSanitizer() {
    44  	sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
    45  	sanitizer.defaultPolicy = createDefaultPolicy()
    46  
    47  	for name, renderer := range renderers {
    48  		sanitizerRules := renderer.SanitizerRules()
    49  		if len(sanitizerRules) > 0 {
    50  			policy := createDefaultPolicy()
    51  			addSanitizerRules(policy, sanitizerRules)
    52  			sanitizer.rendererPolicies[name] = policy
    53  		}
    54  	}
    55  }
    56  
    57  func createDefaultPolicy() *bluemonday.Policy {
    58  	policy := bluemonday.UGCPolicy()
    59  
    60  	// For JS code copy and Mermaid loading state
    61  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
    62  
    63  	// For Chroma markdown plugin
    64  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
    65  
    66  	// Checkboxes
    67  	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
    68  	policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
    69  
    70  	// Custom URL-Schemes
    71  	if len(setting.Markdown.CustomURLSchemes) > 0 {
    72  		policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
    73  	}
    74  
    75  	// Allow classes for anchors
    76  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
    77  
    78  	// Allow classes for task lists
    79  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
    80  
    81  	// Allow icons
    82  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
    83  
    84  	// Allow unlabelled labels
    85  	policy.AllowNoAttrs().OnElements("label")
    86  
    87  	// Allow classes for emojis
    88  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
    89  
    90  	// Allow icons, emojis, chroma syntax and keyword markup on span
    91  	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
    92  
    93  	// Allow generally safe attributes
    94  	generalSafeAttrs := []string{
    95  		"abbr", "accept", "accept-charset",
    96  		"accesskey", "action", "align", "alt",
    97  		"aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
    98  		"axis", "border", "cellpadding", "cellspacing", "char",
    99  		"charoff", "charset", "checked",
   100  		"clear", "cols", "colspan", "color",
   101  		"compact", "coords", "datetime", "dir",
   102  		"disabled", "enctype", "for", "frame",
   103  		"headers", "height", "hreflang",
   104  		"hspace", "ismap", "label", "lang",
   105  		"maxlength", "media", "method",
   106  		"multiple", "name", "nohref", "noshade",
   107  		"nowrap", "open", "prompt", "readonly", "rel", "rev",
   108  		"rows", "rowspan", "rules", "scope",
   109  		"selected", "shape", "size", "span",
   110  		"start", "summary", "tabindex", "target",
   111  		"title", "type", "usemap", "valign", "value",
   112  		"vspace", "width", "itemprop",
   113  	}
   114  
   115  	generalSafeElements := []string{
   116  		"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
   117  		"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote",
   118  		"dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
   119  		"details", "caption", "figure", "figcaption",
   120  		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr",
   121  	}
   122  
   123  	policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
   124  
   125  	policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
   126  
   127  	// FIXME: Need to handle longdesc in img but there is no easy way to do it
   128  
   129  	// Custom keyword markup
   130  	addSanitizerRules(policy, setting.ExternalSanitizerRules)
   131  
   132  	return policy
   133  }
   134  
   135  func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
   136  	for _, rule := range rules {
   137  		if rule.AllowDataURIImages {
   138  			policy.AllowDataURIImages()
   139  		}
   140  		if rule.Element != "" {
   141  			if rule.Regexp != nil {
   142  				policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
   143  			} else {
   144  				policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
   145  			}
   146  		}
   147  	}
   148  }
   149  
   150  // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
   151  func Sanitize(s string) string {
   152  	NewSanitizer()
   153  	return sanitizer.defaultPolicy.Sanitize(s)
   154  }
   155  
   156  // SanitizeReader sanitizes a Reader
   157  func SanitizeReader(r io.Reader, renderer string, w io.Writer) error {
   158  	NewSanitizer()
   159  	policy, exist := sanitizer.rendererPolicies[renderer]
   160  	if !exist {
   161  		policy = sanitizer.defaultPolicy
   162  	}
   163  	return policy.SanitizeReaderToWriter(r, w)
   164  }