github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/internal/lsp/source/comment.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package source 6 7 import ( 8 "bytes" 9 "io" 10 "regexp" 11 "strings" 12 "unicode" 13 "unicode/utf8" 14 ) 15 16 // CommentToMarkdown converts comment text to formatted markdown. 17 // The comment was prepared by DocReader, 18 // so it is known not to have leading, trailing blank lines 19 // nor to have trailing spaces at the end of lines. 20 // The comment markers have already been removed. 21 // 22 // Each line is converted into a markdown line and empty lines are just converted to 23 // newlines. Heading are prefixed with `### ` to make it a markdown heading. 24 // 25 // A span of indented lines retains a 4 space prefix block, with the common indent 26 // prefix removed unless empty, in which case it will be converted to a newline. 27 // 28 // URLs in the comment text are converted into links. 29 func CommentToMarkdown(text string) string { 30 buf := &bytes.Buffer{} 31 commentToMarkdown(buf, text) 32 return buf.String() 33 } 34 35 var ( 36 mdNewline = []byte("\n") 37 mdHeader = []byte("### ") 38 mdIndent = []byte(" ") 39 mdLinkStart = []byte("[") 40 mdLinkDiv = []byte("](") 41 mdLinkEnd = []byte(")") 42 ) 43 44 func commentToMarkdown(w io.Writer, text string) { 45 blocks := blocks(text) 46 for i, b := range blocks { 47 switch b.op { 48 case opPara: 49 for _, line := range b.lines { 50 emphasize(w, line, true) 51 } 52 case opHead: 53 // The header block can consist of only one line. 54 // However, check the number of lines, just in case. 55 if len(b.lines) == 0 { 56 // Skip this block. 57 continue 58 } 59 header := b.lines[0] 60 61 w.Write(mdHeader) 62 commentEscape(w, header, true) 63 // Header doesn't end with \n unlike the lines of other blocks. 64 w.Write(mdNewline) 65 case opPre: 66 for _, line := range b.lines { 67 if isBlank(line) { 68 w.Write(mdNewline) 69 continue 70 } 71 w.Write(mdIndent) 72 w.Write([]byte(line)) 73 } 74 } 75 76 if i < len(blocks)-1 { 77 w.Write(mdNewline) 78 } 79 } 80 } 81 82 const ( 83 ulquo = "“" 84 urquo = "”" 85 ) 86 87 var ( 88 markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) 89 90 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) 91 ) 92 93 // commentEscape escapes comment text for markdown. If nice is set, 94 // also turn `` into “; and '' into ”;. 95 func commentEscape(w io.Writer, text string, nice bool) { 96 if nice { 97 text = convertQuotes(text) 98 } 99 text = escapeRegex(text) 100 w.Write([]byte(text)) 101 } 102 103 func convertQuotes(text string) string { 104 return unicodeQuoteReplacer.Replace(text) 105 } 106 107 func escapeRegex(text string) string { 108 return markdownEscape.ReplaceAllString(text, `\$1`) 109 } 110 111 func emphasize(w io.Writer, line string, nice bool) { 112 for { 113 m := matchRx.FindStringSubmatchIndex(line) 114 if m == nil { 115 break 116 } 117 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 118 119 // write text before match 120 commentEscape(w, line[0:m[0]], nice) 121 122 // adjust match for URLs 123 match := line[m[0]:m[1]] 124 if strings.Contains(match, "://") { 125 m0, m1 := m[0], m[1] 126 for _, s := range []string{"()", "{}", "[]"} { 127 open, close := s[:1], s[1:] // E.g., "(" and ")" 128 // require opening parentheses before closing parentheses (#22285) 129 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { 130 m1 = m0 + i 131 match = line[m0:m1] 132 } 133 // require balanced pairs of parentheses (#5043) 134 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { 135 m1 = strings.LastIndexAny(line[:m1], s) 136 match = line[m0:m1] 137 } 138 } 139 if m1 != m[1] { 140 // redo matching with shortened line for correct indices 141 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) 142 } 143 } 144 145 // Following code has been modified from go/doc since words is always 146 // nil. All html formatting has also been transformed into markdown formatting 147 148 // analyze match 149 url := "" 150 if m[2] >= 0 { 151 url = match 152 } 153 154 // write match 155 if len(url) > 0 { 156 w.Write(mdLinkStart) 157 } 158 159 commentEscape(w, match, nice) 160 161 if len(url) > 0 { 162 w.Write(mdLinkDiv) 163 w.Write([]byte(urlReplacer.Replace(url))) 164 w.Write(mdLinkEnd) 165 } 166 167 // advance 168 line = line[m[1]:] 169 } 170 commentEscape(w, line, nice) 171 } 172 173 // Everything from here on is a copy of go/doc/comment.go 174 175 const ( 176 // Regexp for Go identifiers 177 identRx = `[\pL_][\pL_0-9]*` 178 179 // Regexp for URLs 180 // Match parens, and check later for balance - see #5043, #22285 181 // Match .,:;?! within path, but not at end - see #18139, #16565 182 // This excludes some rare yet valid urls ending in common punctuation 183 // in order to allow sentences ending in URLs. 184 185 // protocol (required) e.g. http 186 protoPart = `(https?|ftp|file|gopher|mailto|nntp)` 187 // host (required) e.g. www.example.com or [::1]:8080 188 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` 189 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar 190 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` 191 192 urlRx = protoPart + `://` + hostPart + pathPart 193 ) 194 195 var ( 196 matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 197 urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) 198 ) 199 200 func indentLen(s string) int { 201 i := 0 202 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 203 i++ 204 } 205 return i 206 } 207 208 func isBlank(s string) bool { 209 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 210 } 211 212 func commonPrefix(a, b string) string { 213 i := 0 214 for i < len(a) && i < len(b) && a[i] == b[i] { 215 i++ 216 } 217 return a[0:i] 218 } 219 220 func unindent(block []string) { 221 if len(block) == 0 { 222 return 223 } 224 225 // compute maximum common white prefix 226 prefix := block[0][0:indentLen(block[0])] 227 for _, line := range block { 228 if !isBlank(line) { 229 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 230 } 231 } 232 n := len(prefix) 233 234 // remove 235 for i, line := range block { 236 if !isBlank(line) { 237 block[i] = line[n:] 238 } 239 } 240 } 241 242 // heading returns the trimmed line if it passes as a section heading; 243 // otherwise it returns the empty string. 244 func heading(line string) string { 245 line = strings.TrimSpace(line) 246 if len(line) == 0 { 247 return "" 248 } 249 250 // a heading must start with an uppercase letter 251 r, _ := utf8.DecodeRuneInString(line) 252 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 253 return "" 254 } 255 256 // it must end in a letter or digit: 257 r, _ = utf8.DecodeLastRuneInString(line) 258 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 259 return "" 260 } 261 262 // exclude lines with illegal characters. we allow "()," 263 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 264 return "" 265 } 266 267 // allow "'" for possessive "'s" only 268 for b := line; ; { 269 i := strings.IndexRune(b, '\'') 270 if i < 0 { 271 break 272 } 273 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 274 return "" // not followed by "s " 275 } 276 b = b[i+2:] 277 } 278 279 // allow "." when followed by non-space 280 for b := line; ; { 281 i := strings.IndexRune(b, '.') 282 if i < 0 { 283 break 284 } 285 if i+1 >= len(b) || b[i+1] == ' ' { 286 return "" // not followed by non-space 287 } 288 b = b[i+1:] 289 } 290 291 return line 292 } 293 294 type op int 295 296 const ( 297 opPara op = iota 298 opHead 299 opPre 300 ) 301 302 type block struct { 303 op op 304 lines []string 305 } 306 307 func blocks(text string) []block { 308 var ( 309 out []block 310 para []string 311 312 lastWasBlank = false 313 lastWasHeading = false 314 ) 315 316 close := func() { 317 if para != nil { 318 out = append(out, block{opPara, para}) 319 para = nil 320 } 321 } 322 323 lines := strings.SplitAfter(text, "\n") 324 unindent(lines) 325 for i := 0; i < len(lines); { 326 line := lines[i] 327 if isBlank(line) { 328 // close paragraph 329 close() 330 i++ 331 lastWasBlank = true 332 continue 333 } 334 if indentLen(line) > 0 { 335 // close paragraph 336 close() 337 338 // count indented or blank lines 339 j := i + 1 340 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 341 j++ 342 } 343 // but not trailing blank lines 344 for j > i && isBlank(lines[j-1]) { 345 j-- 346 } 347 pre := lines[i:j] 348 i = j 349 350 unindent(pre) 351 352 // put those lines in a pre block 353 out = append(out, block{opPre, pre}) 354 lastWasHeading = false 355 continue 356 } 357 358 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 359 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 360 // current line is non-blank, surrounded by blank lines 361 // and the next non-blank line is not indented: this 362 // might be a heading. 363 if head := heading(line); head != "" { 364 close() 365 out = append(out, block{opHead, []string{head}}) 366 i += 2 367 lastWasHeading = true 368 continue 369 } 370 } 371 372 // open paragraph 373 lastWasBlank = false 374 lastWasHeading = false 375 para = append(para, lines[i]) 376 i++ 377 } 378 close() 379 380 return out 381 }