github.com/shijuvar/go@v0.0.0-20141209052335-e8f13700b70c/src/go/doc/comment.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Godoc comment extraction and comment -> HTML formatting. 6 7 package doc 8 9 import ( 10 "io" 11 "regexp" 12 "strings" 13 "text/template" // for HTMLEscape 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 var ( 19 ldquo = []byte("“") 20 rdquo = []byte("”") 21 ) 22 23 // Escape comment text for HTML. If nice is set, 24 // also turn `` into “ and '' into ”. 25 func commentEscape(w io.Writer, text string, nice bool) { 26 last := 0 27 if nice { 28 for i := 0; i < len(text)-1; i++ { 29 ch := text[i] 30 if ch == text[i+1] && (ch == '`' || ch == '\'') { 31 template.HTMLEscape(w, []byte(text[last:i])) 32 last = i + 2 33 switch ch { 34 case '`': 35 w.Write(ldquo) 36 case '\'': 37 w.Write(rdquo) 38 } 39 i++ // loop will add one more 40 } 41 } 42 } 43 template.HTMLEscape(w, []byte(text[last:])) 44 } 45 46 const ( 47 // Regexp for Go identifiers 48 identRx = `[\pL_][\pL_0-9]*` 49 50 // Regexp for URLs 51 protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero` 52 hostPart = `[a-zA-Z0-9_@\-]+` 53 filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen 54 urlRx = `(` + protocol + `)://` + // http:// 55 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ 56 filePart + `([:.,]` + filePart + `)*` 57 ) 58 59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 60 61 var ( 62 html_a = []byte(`<a href="`) 63 html_aq = []byte(`">`) 64 html_enda = []byte("</a>") 65 html_i = []byte("<i>") 66 html_endi = []byte("</i>") 67 html_p = []byte("<p>\n") 68 html_endp = []byte("</p>\n") 69 html_pre = []byte("<pre>") 70 html_endpre = []byte("</pre>\n") 71 html_h = []byte(`<h3 id="`) 72 html_hq = []byte(`">`) 73 html_endh = []byte("</h3>\n") 74 ) 75 76 // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses. 77 func pairedParensPrefixLen(s string) int { 78 parens := 0 79 l := len(s) 80 for i, ch := range s { 81 switch ch { 82 case '(': 83 if parens == 0 { 84 l = i 85 } 86 parens++ 87 case ')': 88 parens-- 89 if parens == 0 { 90 l = len(s) 91 } else if parens < 0 { 92 return i 93 } 94 } 95 } 96 return l 97 } 98 99 // Emphasize and escape a line of text for HTML. URLs are converted into links; 100 // if the URL also appears in the words map, the link is taken from the map (if 101 // the corresponding map value is the empty string, the URL is not converted 102 // into a link). Go identifiers that appear in the words map are italicized; if 103 // the corresponding map value is not the empty string, it is considered a URL 104 // and the word is converted into a link. If nice is set, the remaining text's 105 // appearance is improved where it makes sense (e.g., `` is turned into “ 106 // and '' into ”). 107 func emphasize(w io.Writer, line string, words map[string]string, nice bool) { 108 for { 109 m := matchRx.FindStringSubmatchIndex(line) 110 if m == nil { 111 break 112 } 113 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 114 115 // write text before match 116 commentEscape(w, line[0:m[0]], nice) 117 118 // adjust match if necessary 119 match := line[m[0]:m[1]] 120 if n := pairedParensPrefixLen(match); n < len(match) { 121 // match contains unpaired parentheses (rare); 122 // redo matching with shortened line for correct indices 123 m = matchRx.FindStringSubmatchIndex(line[:m[0]+n]) 124 match = match[:n] 125 } 126 127 // analyze match 128 url := "" 129 italics := false 130 if words != nil { 131 url, italics = words[match] 132 } 133 if m[2] >= 0 { 134 // match against first parenthesized sub-regexp; must be match against urlRx 135 if !italics { 136 // no alternative URL in words list, use match instead 137 url = match 138 } 139 italics = false // don't italicize URLs 140 } 141 142 // write match 143 if len(url) > 0 { 144 w.Write(html_a) 145 template.HTMLEscape(w, []byte(url)) 146 w.Write(html_aq) 147 } 148 if italics { 149 w.Write(html_i) 150 } 151 commentEscape(w, match, nice) 152 if italics { 153 w.Write(html_endi) 154 } 155 if len(url) > 0 { 156 w.Write(html_enda) 157 } 158 159 // advance 160 line = line[m[1]:] 161 } 162 commentEscape(w, line, nice) 163 } 164 165 func indentLen(s string) int { 166 i := 0 167 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 168 i++ 169 } 170 return i 171 } 172 173 func isBlank(s string) bool { 174 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 175 } 176 177 func commonPrefix(a, b string) string { 178 i := 0 179 for i < len(a) && i < len(b) && a[i] == b[i] { 180 i++ 181 } 182 return a[0:i] 183 } 184 185 func unindent(block []string) { 186 if len(block) == 0 { 187 return 188 } 189 190 // compute maximum common white prefix 191 prefix := block[0][0:indentLen(block[0])] 192 for _, line := range block { 193 if !isBlank(line) { 194 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 195 } 196 } 197 n := len(prefix) 198 199 // remove 200 for i, line := range block { 201 if !isBlank(line) { 202 block[i] = line[n:] 203 } 204 } 205 } 206 207 // heading returns the trimmed line if it passes as a section heading; 208 // otherwise it returns the empty string. 209 func heading(line string) string { 210 line = strings.TrimSpace(line) 211 if len(line) == 0 { 212 return "" 213 } 214 215 // a heading must start with an uppercase letter 216 r, _ := utf8.DecodeRuneInString(line) 217 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 218 return "" 219 } 220 221 // it must end in a letter or digit: 222 r, _ = utf8.DecodeLastRuneInString(line) 223 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 224 return "" 225 } 226 227 // exclude lines with illegal characters 228 if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { 229 return "" 230 } 231 232 // allow "'" for possessive "'s" only 233 for b := line; ; { 234 i := strings.IndexRune(b, '\'') 235 if i < 0 { 236 break 237 } 238 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 239 return "" // not followed by "s " 240 } 241 b = b[i+2:] 242 } 243 244 return line 245 } 246 247 type op int 248 249 const ( 250 opPara op = iota 251 opHead 252 opPre 253 ) 254 255 type block struct { 256 op op 257 lines []string 258 } 259 260 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) 261 262 func anchorID(line string) string { 263 // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. 264 return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") 265 } 266 267 // ToHTML converts comment text to formatted HTML. 268 // The comment was prepared by DocReader, 269 // so it is known not to have leading, trailing blank lines 270 // nor to have trailing spaces at the end of lines. 271 // The comment markers have already been removed. 272 // 273 // Each span of unindented non-blank lines is converted into 274 // a single paragraph. There is one exception to the rule: a span that 275 // consists of a single line, is followed by another paragraph span, 276 // begins with a capital letter, and contains no punctuation 277 // is formatted as a heading. 278 // 279 // A span of indented lines is converted into a <pre> block, 280 // with the common indent prefix removed. 281 // 282 // URLs in the comment text are converted into links; if the URL also appears 283 // in the words map, the link is taken from the map (if the corresponding map 284 // value is the empty string, the URL is not converted into a link). 285 // 286 // Go identifiers that appear in the words map are italicized; if the corresponding 287 // map value is not the empty string, it is considered a URL and the word is converted 288 // into a link. 289 func ToHTML(w io.Writer, text string, words map[string]string) { 290 for _, b := range blocks(text) { 291 switch b.op { 292 case opPara: 293 w.Write(html_p) 294 for _, line := range b.lines { 295 emphasize(w, line, words, true) 296 } 297 w.Write(html_endp) 298 case opHead: 299 w.Write(html_h) 300 id := "" 301 for _, line := range b.lines { 302 if id == "" { 303 id = anchorID(line) 304 w.Write([]byte(id)) 305 w.Write(html_hq) 306 } 307 commentEscape(w, line, true) 308 } 309 if id == "" { 310 w.Write(html_hq) 311 } 312 w.Write(html_endh) 313 case opPre: 314 w.Write(html_pre) 315 for _, line := range b.lines { 316 emphasize(w, line, nil, false) 317 } 318 w.Write(html_endpre) 319 } 320 } 321 } 322 323 func blocks(text string) []block { 324 var ( 325 out []block 326 para []string 327 328 lastWasBlank = false 329 lastWasHeading = false 330 ) 331 332 close := func() { 333 if para != nil { 334 out = append(out, block{opPara, para}) 335 para = nil 336 } 337 } 338 339 lines := strings.SplitAfter(text, "\n") 340 unindent(lines) 341 for i := 0; i < len(lines); { 342 line := lines[i] 343 if isBlank(line) { 344 // close paragraph 345 close() 346 i++ 347 lastWasBlank = true 348 continue 349 } 350 if indentLen(line) > 0 { 351 // close paragraph 352 close() 353 354 // count indented or blank lines 355 j := i + 1 356 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 357 j++ 358 } 359 // but not trailing blank lines 360 for j > i && isBlank(lines[j-1]) { 361 j-- 362 } 363 pre := lines[i:j] 364 i = j 365 366 unindent(pre) 367 368 // put those lines in a pre block 369 out = append(out, block{opPre, pre}) 370 lastWasHeading = false 371 continue 372 } 373 374 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 375 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 376 // current line is non-blank, surrounded by blank lines 377 // and the next non-blank line is not indented: this 378 // might be a heading. 379 if head := heading(line); head != "" { 380 close() 381 out = append(out, block{opHead, []string{head}}) 382 i += 2 383 lastWasHeading = true 384 continue 385 } 386 } 387 388 // open paragraph 389 lastWasBlank = false 390 lastWasHeading = false 391 para = append(para, lines[i]) 392 i++ 393 } 394 close() 395 396 return out 397 } 398 399 // ToText prepares comment text for presentation in textual output. 400 // It wraps paragraphs of text to width or fewer Unicode code points 401 // and then prefixes each line with the indent. In preformatted sections 402 // (such as program text), it prefixes each non-blank line with preIndent. 403 func ToText(w io.Writer, text string, indent, preIndent string, width int) { 404 l := lineWrapper{ 405 out: w, 406 width: width, 407 indent: indent, 408 } 409 for _, b := range blocks(text) { 410 switch b.op { 411 case opPara: 412 // l.write will add leading newline if required 413 for _, line := range b.lines { 414 l.write(line) 415 } 416 l.flush() 417 case opHead: 418 w.Write(nl) 419 for _, line := range b.lines { 420 l.write(line + "\n") 421 } 422 l.flush() 423 case opPre: 424 w.Write(nl) 425 for _, line := range b.lines { 426 if isBlank(line) { 427 w.Write([]byte("\n")) 428 } else { 429 w.Write([]byte(preIndent)) 430 w.Write([]byte(line)) 431 } 432 } 433 } 434 } 435 } 436 437 type lineWrapper struct { 438 out io.Writer 439 printed bool 440 width int 441 indent string 442 n int 443 pendSpace int 444 } 445 446 var nl = []byte("\n") 447 var space = []byte(" ") 448 449 func (l *lineWrapper) write(text string) { 450 if l.n == 0 && l.printed { 451 l.out.Write(nl) // blank line before new paragraph 452 } 453 l.printed = true 454 455 for _, f := range strings.Fields(text) { 456 w := utf8.RuneCountInString(f) 457 // wrap if line is too long 458 if l.n > 0 && l.n+l.pendSpace+w > l.width { 459 l.out.Write(nl) 460 l.n = 0 461 l.pendSpace = 0 462 } 463 if l.n == 0 { 464 l.out.Write([]byte(l.indent)) 465 } 466 l.out.Write(space[:l.pendSpace]) 467 l.out.Write([]byte(f)) 468 l.n += l.pendSpace + w 469 l.pendSpace = 1 470 } 471 } 472 473 func (l *lineWrapper) flush() { 474 if l.n == 0 { 475 return 476 } 477 l.out.Write(nl) 478 l.pendSpace = 0 479 l.n = 0 480 }