code.gitea.io/gitea@v1.19.3/modules/markup/mdstripper/mdstripper.go (about) 1 // Copyright 2019 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package mdstripper 5 6 import ( 7 "bytes" 8 "io" 9 "net/url" 10 "strings" 11 "sync" 12 13 "code.gitea.io/gitea/modules/log" 14 "code.gitea.io/gitea/modules/markup/common" 15 "code.gitea.io/gitea/modules/setting" 16 17 "github.com/yuin/goldmark" 18 "github.com/yuin/goldmark/ast" 19 "github.com/yuin/goldmark/extension" 20 "github.com/yuin/goldmark/parser" 21 "github.com/yuin/goldmark/renderer" 22 "github.com/yuin/goldmark/renderer/html" 23 "github.com/yuin/goldmark/text" 24 ) 25 26 var ( 27 giteaHostInit sync.Once 28 giteaHost *url.URL 29 ) 30 31 type stripRenderer struct { 32 localhost *url.URL 33 links []string 34 empty bool 35 } 36 37 func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { 38 return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 39 if !entering { 40 return ast.WalkContinue, nil 41 } 42 switch v := n.(type) { 43 case *ast.Text: 44 if !v.IsRaw() { 45 _, prevSibIsText := n.PreviousSibling().(*ast.Text) 46 coalesce := prevSibIsText 47 r.processString( 48 w, 49 v.Text(source), 50 coalesce) 51 if v.SoftLineBreak() { 52 r.doubleSpace(w) 53 } 54 } 55 return ast.WalkContinue, nil 56 case *ast.Link: 57 r.processLink(w, v.Destination) 58 return ast.WalkSkipChildren, nil 59 case *ast.AutoLink: 60 // This could be a reference to an issue or pull - if so convert it 61 r.processAutoLink(w, v.URL(source)) 62 return ast.WalkSkipChildren, nil 63 } 64 return ast.WalkContinue, nil 65 }) 66 } 67 68 func (r *stripRenderer) doubleSpace(w io.Writer) { 69 if !r.empty { 70 _, _ = w.Write([]byte{'\n'}) 71 } 72 } 73 74 func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { 75 // Always break-up words 76 if !coalesce { 77 r.doubleSpace(w) 78 } 79 _, _ = w.Write(text) 80 r.empty = false 81 } 82 83 // ProcessAutoLinks to detect and handle links to issues and pulls 84 func (r *stripRenderer) processAutoLink(w io.Writer, link []byte) { 85 linkStr := string(link) 86 u, err := url.Parse(linkStr) 87 if err != nil { 88 // Process out of band 89 r.links = append(r.links, linkStr) 90 return 91 } 92 93 // Note: we're not attempting to match the URL scheme (http/https) 94 host := strings.ToLower(u.Host) 95 if host != "" && host != strings.ToLower(r.localhost.Host) { 96 // Process out of band 97 r.links = append(r.links, linkStr) 98 return 99 } 100 101 // We want: /user/repo/issues/3 102 parts := strings.Split(strings.TrimPrefix(u.EscapedPath(), r.localhost.EscapedPath()), "/") 103 if len(parts) != 5 || parts[0] != "" { 104 // Process out of band 105 r.links = append(r.links, linkStr) 106 return 107 } 108 109 var sep string 110 if parts[3] == "issues" { 111 sep = "#" 112 } else if parts[3] == "pulls" { 113 sep = "!" 114 } else { 115 // Process out of band 116 r.links = append(r.links, linkStr) 117 return 118 } 119 120 _, _ = w.Write([]byte(parts[1])) 121 _, _ = w.Write([]byte("/")) 122 _, _ = w.Write([]byte(parts[2])) 123 _, _ = w.Write([]byte(sep)) 124 _, _ = w.Write([]byte(parts[4])) 125 } 126 127 func (r *stripRenderer) processLink(w io.Writer, link []byte) { 128 // Links are processed out of band 129 r.links = append(r.links, string(link)) 130 } 131 132 // GetLinks returns the list of link data collected while parsing 133 func (r *stripRenderer) GetLinks() []string { 134 return r.links 135 } 136 137 // AddOptions adds given option to this renderer. 138 func (r *stripRenderer) AddOptions(...renderer.Option) { 139 // no-op 140 } 141 142 // StripMarkdown parses markdown content by removing all markup and code blocks 143 // in order to extract links and other references 144 func StripMarkdown(rawBytes []byte) (string, []string) { 145 buf, links := StripMarkdownBytes(rawBytes) 146 return string(buf), links 147 } 148 149 var ( 150 stripParser parser.Parser 151 once = sync.Once{} 152 ) 153 154 // StripMarkdownBytes parses markdown content by removing all markup and code blocks 155 // in order to extract links and other references 156 func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { 157 once.Do(func() { 158 gdMarkdown := goldmark.New( 159 goldmark.WithExtensions(extension.Table, 160 extension.Strikethrough, 161 extension.TaskList, 162 extension.DefinitionList, 163 common.FootnoteExtension, 164 common.Linkify, 165 ), 166 goldmark.WithParserOptions( 167 parser.WithAttribute(), 168 parser.WithAutoHeadingID(), 169 ), 170 goldmark.WithRendererOptions( 171 html.WithUnsafe(), 172 ), 173 ) 174 stripParser = gdMarkdown.Parser() 175 }) 176 stripper := &stripRenderer{ 177 localhost: getGiteaHost(), 178 links: make([]string, 0, 10), 179 empty: true, 180 } 181 reader := text.NewReader(rawBytes) 182 doc := stripParser.Parse(reader) 183 var buf bytes.Buffer 184 if err := stripper.Render(&buf, rawBytes, doc); err != nil { 185 log.Error("Unable to strip: %v", err) 186 } 187 return buf.Bytes(), stripper.GetLinks() 188 } 189 190 // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information 191 func getGiteaHost() *url.URL { 192 giteaHostInit.Do(func() { 193 var err error 194 if giteaHost, err = url.Parse(setting.AppURL); err != nil { 195 giteaHost = &url.URL{} 196 } 197 }) 198 return giteaHost 199 }