github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/go/doc/comment.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Godoc comment extraction and comment -> HTML formatting. 6 7 package doc 8 9 import ( 10 "io" 11 "regexp" 12 "strings" 13 "text/template" // for HTMLEscape 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 var ( 19 ldquo = []byte("“") 20 rdquo = []byte("”") 21 ) 22 23 // Escape comment text for HTML. If nice is set, 24 // also turn `` into “ and '' into ”. 25 func commentEscape(w io.Writer, text string, nice bool) { 26 last := 0 27 if nice { 28 for i := 0; i < len(text)-1; i++ { 29 ch := text[i] 30 if ch == text[i+1] && (ch == '`' || ch == '\'') { 31 template.HTMLEscape(w, []byte(text[last:i])) 32 last = i + 2 33 switch ch { 34 case '`': 35 w.Write(ldquo) 36 case '\'': 37 w.Write(rdquo) 38 } 39 i++ // loop will add one more 40 } 41 } 42 } 43 template.HTMLEscape(w, []byte(text[last:])) 44 } 45 46 const ( 47 // Regexp for Go identifiers 48 identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this 49 50 // Regexp for URLs 51 protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` 52 hostPart = `[a-zA-Z0-9_@\-]+` 53 filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` 54 urlRx = protocol + `//` + // http:// 55 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ 56 filePart + `([:.,]` + filePart + `)*` 57 ) 58 59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 60 61 var ( 62 html_a = []byte(`<a href="`) 63 html_aq = []byte(`">`) 64 html_enda = []byte("</a>") 65 html_i = []byte("<i>") 66 html_endi = []byte("</i>") 67 html_p = []byte("<p>\n") 68 html_endp = []byte("</p>\n") 69 html_pre = []byte("<pre>") 70 html_endpre = []byte("</pre>\n") 71 html_h = []byte(`<h3 id="`) 72 html_hq = []byte(`">`) 73 html_endh = []byte("</h3>\n") 74 ) 75 76 // Emphasize and escape a line of text for HTML. URLs are converted into links; 77 // if the URL also appears in the words map, the link is taken from the map (if 78 // the corresponding map value is the empty string, the URL is not converted 79 // into a link). Go identifiers that appear in the words map are italicized; if 80 // the corresponding map value is not the empty string, it is considered a URL 81 // and the word is converted into a link. If nice is set, the remaining text's 82 // appearance is improved where it makes sense (e.g., `` is turned into “ 83 // and '' into ”). 84 func emphasize(w io.Writer, line string, words map[string]string, nice bool) { 85 for { 86 m := matchRx.FindStringSubmatchIndex(line) 87 if m == nil { 88 break 89 } 90 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 91 92 // write text before match 93 commentEscape(w, line[0:m[0]], nice) 94 95 // analyze match 96 match := line[m[0]:m[1]] 97 url := "" 98 italics := false 99 if words != nil { 100 url, italics = words[string(match)] 101 } 102 if m[2] >= 0 { 103 // match against first parenthesized sub-regexp; must be match against urlRx 104 if !italics { 105 // no alternative URL in words list, use match instead 106 url = string(match) 107 } 108 italics = false // don't italicize URLs 109 } 110 111 // write match 112 if len(url) > 0 { 113 w.Write(html_a) 114 template.HTMLEscape(w, []byte(url)) 115 w.Write(html_aq) 116 } 117 if italics { 118 w.Write(html_i) 119 } 120 commentEscape(w, match, nice) 121 if italics { 122 w.Write(html_endi) 123 } 124 if len(url) > 0 { 125 w.Write(html_enda) 126 } 127 128 // advance 129 line = line[m[1]:] 130 } 131 commentEscape(w, line, nice) 132 } 133 134 func indentLen(s string) int { 135 i := 0 136 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 137 i++ 138 } 139 return i 140 } 141 142 func isBlank(s string) bool { 143 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 144 } 145 146 func commonPrefix(a, b string) string { 147 i := 0 148 for i < len(a) && i < len(b) && a[i] == b[i] { 149 i++ 150 } 151 return a[0:i] 152 } 153 154 func unindent(block []string) { 155 if len(block) == 0 { 156 return 157 } 158 159 // compute maximum common white prefix 160 prefix := block[0][0:indentLen(block[0])] 161 for _, line := range block { 162 if !isBlank(line) { 163 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 164 } 165 } 166 n := len(prefix) 167 168 // remove 169 for i, line := range block { 170 if !isBlank(line) { 171 block[i] = line[n:] 172 } 173 } 174 } 175 176 // heading returns the trimmed line if it passes as a section heading; 177 // otherwise it returns the empty string. 178 func heading(line string) string { 179 line = strings.TrimSpace(line) 180 if len(line) == 0 { 181 return "" 182 } 183 184 // a heading must start with an uppercase letter 185 r, _ := utf8.DecodeRuneInString(line) 186 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 187 return "" 188 } 189 190 // it must end in a letter or digit: 191 r, _ = utf8.DecodeLastRuneInString(line) 192 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 193 return "" 194 } 195 196 // exclude lines with illegal characters 197 if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { 198 return "" 199 } 200 201 // allow "'" for possessive "'s" only 202 for b := line; ; { 203 i := strings.IndexRune(b, '\'') 204 if i < 0 { 205 break 206 } 207 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 208 return "" // not followed by "s " 209 } 210 b = b[i+2:] 211 } 212 213 return line 214 } 215 216 type op int 217 218 const ( 219 opPara op = iota 220 opHead 221 opPre 222 ) 223 224 type block struct { 225 op op 226 lines []string 227 } 228 229 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) 230 231 func anchorID(line string) string { 232 // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. 233 return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") 234 } 235 236 // ToHTML converts comment text to formatted HTML. 237 // The comment was prepared by DocReader, 238 // so it is known not to have leading, trailing blank lines 239 // nor to have trailing spaces at the end of lines. 240 // The comment markers have already been removed. 241 // 242 // Turn each run of multiple \n into </p><p>. 243 // Turn each run of indented lines into a <pre> block without indent. 244 // Enclose headings with header tags. 245 // 246 // URLs in the comment text are converted into links; if the URL also appears 247 // in the words map, the link is taken from the map (if the corresponding map 248 // value is the empty string, the URL is not converted into a link). 249 // 250 // Go identifiers that appear in the words map are italicized; if the corresponding 251 // map value is not the empty string, it is considered a URL and the word is converted 252 // into a link. 253 func ToHTML(w io.Writer, text string, words map[string]string) { 254 for _, b := range blocks(text) { 255 switch b.op { 256 case opPara: 257 w.Write(html_p) 258 for _, line := range b.lines { 259 emphasize(w, line, words, true) 260 } 261 w.Write(html_endp) 262 case opHead: 263 w.Write(html_h) 264 id := "" 265 for _, line := range b.lines { 266 if id == "" { 267 id = anchorID(line) 268 w.Write([]byte(id)) 269 w.Write(html_hq) 270 } 271 commentEscape(w, line, true) 272 } 273 if id == "" { 274 w.Write(html_hq) 275 } 276 w.Write(html_endh) 277 case opPre: 278 w.Write(html_pre) 279 for _, line := range b.lines { 280 emphasize(w, line, nil, false) 281 } 282 w.Write(html_endpre) 283 } 284 } 285 } 286 287 func blocks(text string) []block { 288 var ( 289 out []block 290 para []string 291 292 lastWasBlank = false 293 lastWasHeading = false 294 ) 295 296 close := func() { 297 if para != nil { 298 out = append(out, block{opPara, para}) 299 para = nil 300 } 301 } 302 303 lines := strings.SplitAfter(text, "\n") 304 unindent(lines) 305 for i := 0; i < len(lines); { 306 line := lines[i] 307 if isBlank(line) { 308 // close paragraph 309 close() 310 i++ 311 lastWasBlank = true 312 continue 313 } 314 if indentLen(line) > 0 { 315 // close paragraph 316 close() 317 318 // count indented or blank lines 319 j := i + 1 320 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 321 j++ 322 } 323 // but not trailing blank lines 324 for j > i && isBlank(lines[j-1]) { 325 j-- 326 } 327 pre := lines[i:j] 328 i = j 329 330 unindent(pre) 331 332 // put those lines in a pre block 333 out = append(out, block{opPre, pre}) 334 lastWasHeading = false 335 continue 336 } 337 338 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 339 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 340 // current line is non-blank, surrounded by blank lines 341 // and the next non-blank line is not indented: this 342 // might be a heading. 343 if head := heading(line); head != "" { 344 close() 345 out = append(out, block{opHead, []string{head}}) 346 i += 2 347 lastWasHeading = true 348 continue 349 } 350 } 351 352 // open paragraph 353 lastWasBlank = false 354 lastWasHeading = false 355 para = append(para, lines[i]) 356 i++ 357 } 358 close() 359 360 return out 361 } 362 363 // ToText prepares comment text for presentation in textual output. 364 // It wraps paragraphs of text to width or fewer Unicode code points 365 // and then prefixes each line with the indent. In preformatted sections 366 // (such as program text), it prefixes each non-blank line with preIndent. 367 func ToText(w io.Writer, text string, indent, preIndent string, width int) { 368 l := lineWrapper{ 369 out: w, 370 width: width, 371 indent: indent, 372 } 373 for _, b := range blocks(text) { 374 switch b.op { 375 case opPara: 376 // l.write will add leading newline if required 377 for _, line := range b.lines { 378 l.write(line) 379 } 380 l.flush() 381 case opHead: 382 w.Write(nl) 383 for _, line := range b.lines { 384 l.write(line + "\n") 385 } 386 l.flush() 387 case opPre: 388 w.Write(nl) 389 for _, line := range b.lines { 390 if !isBlank(line) { 391 w.Write([]byte(preIndent)) 392 w.Write([]byte(line)) 393 } 394 } 395 } 396 } 397 } 398 399 type lineWrapper struct { 400 out io.Writer 401 printed bool 402 width int 403 indent string 404 n int 405 pendSpace int 406 } 407 408 var nl = []byte("\n") 409 var space = []byte(" ") 410 411 func (l *lineWrapper) write(text string) { 412 if l.n == 0 && l.printed { 413 l.out.Write(nl) // blank line before new paragraph 414 } 415 l.printed = true 416 417 for _, f := range strings.Fields(text) { 418 w := utf8.RuneCountInString(f) 419 // wrap if line is too long 420 if l.n > 0 && l.n+l.pendSpace+w > l.width { 421 l.out.Write(nl) 422 l.n = 0 423 l.pendSpace = 0 424 } 425 if l.n == 0 { 426 l.out.Write([]byte(l.indent)) 427 } 428 l.out.Write(space[:l.pendSpace]) 429 l.out.Write([]byte(f)) 430 l.n += l.pendSpace + w 431 l.pendSpace = 1 432 } 433 } 434 435 func (l *lineWrapper) flush() { 436 if l.n == 0 { 437 return 438 } 439 l.out.Write(nl) 440 l.pendSpace = 0 441 l.n = 0 442 }