github.com/april1989/origin-go-tools@v0.0.32/internal/lsp/source/comment.go (about) 1 package source 2 3 import ( 4 "bytes" 5 "io" 6 "regexp" 7 "strings" 8 "unicode" 9 "unicode/utf8" 10 ) 11 12 // CommentToMarkdown converts comment text to formatted markdown. 13 // The comment was prepared by DocReader, 14 // so it is known not to have leading, trailing blank lines 15 // nor to have trailing spaces at the end of lines. 16 // The comment markers have already been removed. 17 // 18 // Each line is converted into a markdown line and empty lines are just converted to 19 // newlines. Heading are prefixed with `### ` to make it a markdown heading. 20 // 21 // A span of indented lines retains a 4 space prefix block, with the common indent 22 // prefix removed unless empty, in which case it will be converted to a newline. 23 // 24 // URLs in the comment text are converted into links. 25 func CommentToMarkdown(text string) string { 26 buf := &bytes.Buffer{} 27 commentToMarkdown(buf, text) 28 return buf.String() 29 } 30 31 var ( 32 mdNewline = []byte("\n") 33 mdHeader = []byte("### ") 34 mdIndent = []byte(" ") 35 mdLinkStart = []byte("[") 36 mdLinkDiv = []byte("](") 37 mdLinkEnd = []byte(")") 38 ) 39 40 func commentToMarkdown(w io.Writer, text string) { 41 isFirstLine := true 42 for _, b := range blocks(text) { 43 switch b.op { 44 case opPara: 45 if !isFirstLine { 46 w.Write(mdNewline) 47 } 48 49 for _, line := range b.lines { 50 emphasize(w, line, true) 51 } 52 case opHead: 53 if !isFirstLine { 54 w.Write(mdNewline) 55 } 56 w.Write(mdNewline) 57 58 for _, line := range b.lines { 59 w.Write(mdHeader) 60 commentEscape(w, line, true) 61 w.Write(mdNewline) 62 } 63 case opPre: 64 if !isFirstLine { 65 w.Write(mdNewline) 66 } 67 w.Write(mdNewline) 68 69 for _, line := range b.lines { 70 if isBlank(line) { 71 w.Write(mdNewline) 72 } else { 73 w.Write(mdIndent) 74 w.Write([]byte(line)) 75 w.Write(mdNewline) 76 } 77 } 78 } 79 isFirstLine = false 80 } 81 } 82 83 const ( 84 ulquo = "“" 85 urquo = "”" 86 ) 87 88 var ( 89 markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) 90 91 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) 92 ) 93 94 // commentEscape escapes comment text for markdown. If nice is set, 95 // also turn `` into “; and '' into ”;. 96 func commentEscape(w io.Writer, text string, nice bool) { 97 if nice { 98 text = convertQuotes(text) 99 } 100 text = escapeRegex(text) 101 w.Write([]byte(text)) 102 } 103 104 func convertQuotes(text string) string { 105 return unicodeQuoteReplacer.Replace(text) 106 } 107 108 func escapeRegex(text string) string { 109 return markdownEscape.ReplaceAllString(text, `\$1`) 110 } 111 112 func emphasize(w io.Writer, line string, nice bool) { 113 for { 114 m := matchRx.FindStringSubmatchIndex(line) 115 if m == nil { 116 break 117 } 118 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 119 120 // write text before match 121 commentEscape(w, line[0:m[0]], nice) 122 123 // adjust match for URLs 124 match := line[m[0]:m[1]] 125 if strings.Contains(match, "://") { 126 m0, m1 := m[0], m[1] 127 for _, s := range []string{"()", "{}", "[]"} { 128 open, close := s[:1], s[1:] // E.g., "(" and ")" 129 // require opening parentheses before closing parentheses (#22285) 130 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { 131 m1 = m0 + i 132 match = line[m0:m1] 133 } 134 // require balanced pairs of parentheses (#5043) 135 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { 136 m1 = strings.LastIndexAny(line[:m1], s) 137 match = line[m0:m1] 138 } 139 } 140 if m1 != m[1] { 141 // redo matching with shortened line for correct indices 142 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) 143 } 144 } 145 146 // Following code has been modified from go/doc since words is always 147 // nil. All html formatting has also been transformed into markdown formatting 148 149 // analyze match 150 url := "" 151 if m[2] >= 0 { 152 url = match 153 } 154 155 // write match 156 if len(url) > 0 { 157 w.Write(mdLinkStart) 158 } 159 160 commentEscape(w, match, nice) 161 162 if len(url) > 0 { 163 w.Write(mdLinkDiv) 164 w.Write([]byte(urlReplacer.Replace(url))) 165 w.Write(mdLinkEnd) 166 } 167 168 // advance 169 line = line[m[1]:] 170 } 171 commentEscape(w, line, nice) 172 } 173 174 // Everything from here on is a copy of go/doc/comment.go 175 176 const ( 177 // Regexp for Go identifiers 178 identRx = `[\pL_][\pL_0-9]*` 179 180 // Regexp for URLs 181 // Match parens, and check later for balance - see #5043, #22285 182 // Match .,:;?! within path, but not at end - see #18139, #16565 183 // This excludes some rare yet valid urls ending in common punctuation 184 // in order to allow sentences ending in URLs. 185 186 // protocol (required) e.g. http 187 protoPart = `(https?|ftp|file|gopher|mailto|nntp)` 188 // host (required) e.g. www.example.com or [::1]:8080 189 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` 190 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar 191 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` 192 193 urlRx = protoPart + `://` + hostPart + pathPart 194 ) 195 196 var ( 197 matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 198 urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) 199 ) 200 201 func indentLen(s string) int { 202 i := 0 203 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 204 i++ 205 } 206 return i 207 } 208 209 func isBlank(s string) bool { 210 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 211 } 212 213 func commonPrefix(a, b string) string { 214 i := 0 215 for i < len(a) && i < len(b) && a[i] == b[i] { 216 i++ 217 } 218 return a[0:i] 219 } 220 221 func unindent(block []string) { 222 if len(block) == 0 { 223 return 224 } 225 226 // compute maximum common white prefix 227 prefix := block[0][0:indentLen(block[0])] 228 for _, line := range block { 229 if !isBlank(line) { 230 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 231 } 232 } 233 n := len(prefix) 234 235 // remove 236 for i, line := range block { 237 if !isBlank(line) { 238 block[i] = line[n:] 239 } 240 } 241 } 242 243 // heading returns the trimmed line if it passes as a section heading; 244 // otherwise it returns the empty string. 245 func heading(line string) string { 246 line = strings.TrimSpace(line) 247 if len(line) == 0 { 248 return "" 249 } 250 251 // a heading must start with an uppercase letter 252 r, _ := utf8.DecodeRuneInString(line) 253 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 254 return "" 255 } 256 257 // it must end in a letter or digit: 258 r, _ = utf8.DecodeLastRuneInString(line) 259 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 260 return "" 261 } 262 263 // exclude lines with illegal characters. we allow "()," 264 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 265 return "" 266 } 267 268 // allow "'" for possessive "'s" only 269 for b := line; ; { 270 i := strings.IndexRune(b, '\'') 271 if i < 0 { 272 break 273 } 274 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 275 return "" // not followed by "s " 276 } 277 b = b[i+2:] 278 } 279 280 // allow "." when followed by non-space 281 for b := line; ; { 282 i := strings.IndexRune(b, '.') 283 if i < 0 { 284 break 285 } 286 if i+1 >= len(b) || b[i+1] == ' ' { 287 return "" // not followed by non-space 288 } 289 b = b[i+1:] 290 } 291 292 return line 293 } 294 295 type op int 296 297 const ( 298 opPara op = iota 299 opHead 300 opPre 301 ) 302 303 type block struct { 304 op op 305 lines []string 306 } 307 308 func blocks(text string) []block { 309 var ( 310 out []block 311 para []string 312 313 lastWasBlank = false 314 lastWasHeading = false 315 ) 316 317 close := func() { 318 if para != nil { 319 out = append(out, block{opPara, para}) 320 para = nil 321 } 322 } 323 324 lines := strings.SplitAfter(text, "\n") 325 unindent(lines) 326 for i := 0; i < len(lines); { 327 line := lines[i] 328 if isBlank(line) { 329 // close paragraph 330 close() 331 i++ 332 lastWasBlank = true 333 continue 334 } 335 if indentLen(line) > 0 { 336 // close paragraph 337 close() 338 339 // count indented or blank lines 340 j := i + 1 341 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 342 j++ 343 } 344 // but not trailing blank lines 345 for j > i && isBlank(lines[j-1]) { 346 j-- 347 } 348 pre := lines[i:j] 349 i = j 350 351 unindent(pre) 352 353 // put those lines in a pre block 354 out = append(out, block{opPre, pre}) 355 lastWasHeading = false 356 continue 357 } 358 359 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 360 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 361 // current line is non-blank, surrounded by blank lines 362 // and the next non-blank line is not indented: this 363 // might be a heading. 364 if head := heading(line); head != "" { 365 close() 366 out = append(out, block{opHead, []string{head}}) 367 i += 2 368 lastWasHeading = true 369 continue 370 } 371 } 372 373 // open paragraph 374 lastWasBlank = false 375 lastWasHeading = false 376 para = append(para, lines[i]) 377 i++ 378 } 379 close() 380 381 return out 382 }