github.com/v2fly/tools@v0.100.0/internal/lsp/source/comment.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package source 6 7 import ( 8 "bytes" 9 "io" 10 "regexp" 11 "strings" 12 "unicode" 13 "unicode/utf8" 14 ) 15 16 // CommentToMarkdown converts comment text to formatted markdown. 17 // The comment was prepared by DocReader, 18 // so it is known not to have leading, trailing blank lines 19 // nor to have trailing spaces at the end of lines. 20 // The comment markers have already been removed. 21 // 22 // Each line is converted into a markdown line and empty lines are just converted to 23 // newlines. Heading are prefixed with `### ` to make it a markdown heading. 24 // 25 // A span of indented lines retains a 4 space prefix block, with the common indent 26 // prefix removed unless empty, in which case it will be converted to a newline. 27 // 28 // URLs in the comment text are converted into links. 29 func CommentToMarkdown(text string) string { 30 buf := &bytes.Buffer{} 31 commentToMarkdown(buf, text) 32 return buf.String() 33 } 34 35 var ( 36 mdNewline = []byte("\n") 37 mdHeader = []byte("### ") 38 mdIndent = []byte(" ") 39 mdLinkStart = []byte("[") 40 mdLinkDiv = []byte("](") 41 mdLinkEnd = []byte(")") 42 ) 43 44 func commentToMarkdown(w io.Writer, text string) { 45 isFirstLine := true 46 for _, b := range blocks(text) { 47 switch b.op { 48 case opPara: 49 if !isFirstLine { 50 w.Write(mdNewline) 51 } 52 53 for _, line := range b.lines { 54 emphasize(w, line, true) 55 } 56 case opHead: 57 if !isFirstLine { 58 w.Write(mdNewline) 59 } 60 w.Write(mdNewline) 61 62 for _, line := range b.lines { 63 w.Write(mdHeader) 64 commentEscape(w, line, true) 65 w.Write(mdNewline) 66 } 67 case opPre: 68 if !isFirstLine { 69 w.Write(mdNewline) 70 } 71 w.Write(mdNewline) 72 73 for _, line := range b.lines { 74 if isBlank(line) { 75 w.Write(mdNewline) 76 } else { 77 w.Write(mdIndent) 78 w.Write([]byte(line)) 79 w.Write(mdNewline) 80 } 81 } 82 } 83 isFirstLine = false 84 } 85 } 86 87 const ( 88 ulquo = "“" 89 urquo = "”" 90 ) 91 92 var ( 93 markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) 94 95 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) 96 ) 97 98 // commentEscape escapes comment text for markdown. If nice is set, 99 // also turn `` into “; and '' into ”;. 100 func commentEscape(w io.Writer, text string, nice bool) { 101 if nice { 102 text = convertQuotes(text) 103 } 104 text = escapeRegex(text) 105 w.Write([]byte(text)) 106 } 107 108 func convertQuotes(text string) string { 109 return unicodeQuoteReplacer.Replace(text) 110 } 111 112 func escapeRegex(text string) string { 113 return markdownEscape.ReplaceAllString(text, `\$1`) 114 } 115 116 func emphasize(w io.Writer, line string, nice bool) { 117 for { 118 m := matchRx.FindStringSubmatchIndex(line) 119 if m == nil { 120 break 121 } 122 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 123 124 // write text before match 125 commentEscape(w, line[0:m[0]], nice) 126 127 // adjust match for URLs 128 match := line[m[0]:m[1]] 129 if strings.Contains(match, "://") { 130 m0, m1 := m[0], m[1] 131 for _, s := range []string{"()", "{}", "[]"} { 132 open, close := s[:1], s[1:] // E.g., "(" and ")" 133 // require opening parentheses before closing parentheses (#22285) 134 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { 135 m1 = m0 + i 136 match = line[m0:m1] 137 } 138 // require balanced pairs of parentheses (#5043) 139 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { 140 m1 = strings.LastIndexAny(line[:m1], s) 141 match = line[m0:m1] 142 } 143 } 144 if m1 != m[1] { 145 // redo matching with shortened line for correct indices 146 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) 147 } 148 } 149 150 // Following code has been modified from go/doc since words is always 151 // nil. All html formatting has also been transformed into markdown formatting 152 153 // analyze match 154 url := "" 155 if m[2] >= 0 { 156 url = match 157 } 158 159 // write match 160 if len(url) > 0 { 161 w.Write(mdLinkStart) 162 } 163 164 commentEscape(w, match, nice) 165 166 if len(url) > 0 { 167 w.Write(mdLinkDiv) 168 w.Write([]byte(urlReplacer.Replace(url))) 169 w.Write(mdLinkEnd) 170 } 171 172 // advance 173 line = line[m[1]:] 174 } 175 commentEscape(w, line, nice) 176 } 177 178 // Everything from here on is a copy of go/doc/comment.go 179 180 const ( 181 // Regexp for Go identifiers 182 identRx = `[\pL_][\pL_0-9]*` 183 184 // Regexp for URLs 185 // Match parens, and check later for balance - see #5043, #22285 186 // Match .,:;?! within path, but not at end - see #18139, #16565 187 // This excludes some rare yet valid urls ending in common punctuation 188 // in order to allow sentences ending in URLs. 189 190 // protocol (required) e.g. http 191 protoPart = `(https?|ftp|file|gopher|mailto|nntp)` 192 // host (required) e.g. www.example.com or [::1]:8080 193 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` 194 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar 195 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` 196 197 urlRx = protoPart + `://` + hostPart + pathPart 198 ) 199 200 var ( 201 matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 202 urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) 203 ) 204 205 func indentLen(s string) int { 206 i := 0 207 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 208 i++ 209 } 210 return i 211 } 212 213 func isBlank(s string) bool { 214 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 215 } 216 217 func commonPrefix(a, b string) string { 218 i := 0 219 for i < len(a) && i < len(b) && a[i] == b[i] { 220 i++ 221 } 222 return a[0:i] 223 } 224 225 func unindent(block []string) { 226 if len(block) == 0 { 227 return 228 } 229 230 // compute maximum common white prefix 231 prefix := block[0][0:indentLen(block[0])] 232 for _, line := range block { 233 if !isBlank(line) { 234 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 235 } 236 } 237 n := len(prefix) 238 239 // remove 240 for i, line := range block { 241 if !isBlank(line) { 242 block[i] = line[n:] 243 } 244 } 245 } 246 247 // heading returns the trimmed line if it passes as a section heading; 248 // otherwise it returns the empty string. 249 func heading(line string) string { 250 line = strings.TrimSpace(line) 251 if len(line) == 0 { 252 return "" 253 } 254 255 // a heading must start with an uppercase letter 256 r, _ := utf8.DecodeRuneInString(line) 257 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 258 return "" 259 } 260 261 // it must end in a letter or digit: 262 r, _ = utf8.DecodeLastRuneInString(line) 263 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 264 return "" 265 } 266 267 // exclude lines with illegal characters. we allow "()," 268 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 269 return "" 270 } 271 272 // allow "'" for possessive "'s" only 273 for b := line; ; { 274 i := strings.IndexRune(b, '\'') 275 if i < 0 { 276 break 277 } 278 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 279 return "" // not followed by "s " 280 } 281 b = b[i+2:] 282 } 283 284 // allow "." when followed by non-space 285 for b := line; ; { 286 i := strings.IndexRune(b, '.') 287 if i < 0 { 288 break 289 } 290 if i+1 >= len(b) || b[i+1] == ' ' { 291 return "" // not followed by non-space 292 } 293 b = b[i+1:] 294 } 295 296 return line 297 } 298 299 type op int 300 301 const ( 302 opPara op = iota 303 opHead 304 opPre 305 ) 306 307 type block struct { 308 op op 309 lines []string 310 } 311 312 func blocks(text string) []block { 313 var ( 314 out []block 315 para []string 316 317 lastWasBlank = false 318 lastWasHeading = false 319 ) 320 321 close := func() { 322 if para != nil { 323 out = append(out, block{opPara, para}) 324 para = nil 325 } 326 } 327 328 lines := strings.SplitAfter(text, "\n") 329 unindent(lines) 330 for i := 0; i < len(lines); { 331 line := lines[i] 332 if isBlank(line) { 333 // close paragraph 334 close() 335 i++ 336 lastWasBlank = true 337 continue 338 } 339 if indentLen(line) > 0 { 340 // close paragraph 341 close() 342 343 // count indented or blank lines 344 j := i + 1 345 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 346 j++ 347 } 348 // but not trailing blank lines 349 for j > i && isBlank(lines[j-1]) { 350 j-- 351 } 352 pre := lines[i:j] 353 i = j 354 355 unindent(pre) 356 357 // put those lines in a pre block 358 out = append(out, block{opPre, pre}) 359 lastWasHeading = false 360 continue 361 } 362 363 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 364 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 365 // current line is non-blank, surrounded by blank lines 366 // and the next non-blank line is not indented: this 367 // might be a heading. 368 if head := heading(line); head != "" { 369 close() 370 out = append(out, block{opHead, []string{head}}) 371 i += 2 372 lastWasHeading = true 373 continue 374 } 375 } 376 377 // open paragraph 378 lastWasBlank = false 379 lastWasHeading = false 380 para = append(para, lines[i]) 381 i++ 382 } 383 close() 384 385 return out 386 }