github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/go/doc/comment.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Godoc comment extraction and comment -> HTML formatting. 6 7 package doc 8 9 import ( 10 "io" 11 "regexp" 12 "strings" 13 "text/template" // for HTMLEscape 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 var ( 19 ldquo = []byte("“") 20 rdquo = []byte("”") 21 ) 22 23 // Escape comment text for HTML. If nice is set, 24 // also turn `` into “ and '' into ”. 25 func commentEscape(w io.Writer, text string, nice bool) { 26 last := 0 27 if nice { 28 for i := 0; i < len(text)-1; i++ { 29 ch := text[i] 30 if ch == text[i+1] && (ch == '`' || ch == '\'') { 31 template.HTMLEscape(w, []byte(text[last:i])) 32 last = i + 2 33 switch ch { 34 case '`': 35 w.Write(ldquo) 36 case '\'': 37 w.Write(rdquo) 38 } 39 i++ // loop will add one more 40 } 41 } 42 } 43 template.HTMLEscape(w, []byte(text[last:])) 44 } 45 46 const ( 47 // Regexp for Go identifiers 48 identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this 49 50 // Regexp for URLs 51 protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` 52 hostPart = `[a-zA-Z0-9_@\-]+` 53 filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` 54 urlRx = protocol + `//` + // http:// 55 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ 56 filePart + `([:.,]` + filePart + `)*` 57 ) 58 59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 60 61 var ( 62 html_a = []byte(`<a href="`) 63 html_aq = []byte(`">`) 64 html_enda = []byte("</a>") 65 html_i = []byte("<i>") 66 html_endi = []byte("</i>") 67 html_p = []byte("<p>\n") 68 html_endp = []byte("</p>\n") 69 html_pre = []byte("<pre>") 70 html_endpre = []byte("</pre>\n") 71 html_h = []byte(`<h3 id="`) 72 html_hq = []byte(`">`) 73 html_endh = []byte("</h3>\n") 74 ) 75 76 // Emphasize and escape a line of text for HTML. URLs are converted into links; 77 // if the URL also appears in the words map, the link is taken from the map (if 78 // the corresponding map value is the empty string, the URL is not converted 79 // into a link). Go identifiers that appear in the words map are italicized; if 80 // the corresponding map value is not the empty string, it is considered a URL 81 // and the word is converted into a link. If nice is set, the remaining text's 82 // appearance is improved where it makes sense (e.g., `` is turned into “ 83 // and '' into ”). 84 func emphasize(w io.Writer, line string, words map[string]string, nice bool) { 85 for { 86 m := matchRx.FindStringSubmatchIndex(line) 87 if m == nil { 88 break 89 } 90 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 91 92 // write text before match 93 commentEscape(w, line[0:m[0]], nice) 94 95 // analyze match 96 match := line[m[0]:m[1]] 97 url := "" 98 italics := false 99 if words != nil { 100 url, italics = words[string(match)] 101 } 102 if m[2] >= 0 { 103 // match against first parenthesized sub-regexp; must be match against urlRx 104 if !italics { 105 // no alternative URL in words list, use match instead 106 url = string(match) 107 } 108 italics = false // don't italicize URLs 109 } 110 111 // write match 112 if len(url) > 0 { 113 w.Write(html_a) 114 template.HTMLEscape(w, []byte(url)) 115 w.Write(html_aq) 116 } 117 if italics { 118 w.Write(html_i) 119 } 120 commentEscape(w, match, nice) 121 if italics { 122 w.Write(html_endi) 123 } 124 if len(url) > 0 { 125 w.Write(html_enda) 126 } 127 128 // advance 129 line = line[m[1]:] 130 } 131 commentEscape(w, line, nice) 132 } 133 134 func indentLen(s string) int { 135 i := 0 136 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 137 i++ 138 } 139 return i 140 } 141 142 func isBlank(s string) bool { 143 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 144 } 145 146 func commonPrefix(a, b string) string { 147 i := 0 148 for i < len(a) && i < len(b) && a[i] == b[i] { 149 i++ 150 } 151 return a[0:i] 152 } 153 154 func unindent(block []string) { 155 if len(block) == 0 { 156 return 157 } 158 159 // compute maximum common white prefix 160 prefix := block[0][0:indentLen(block[0])] 161 for _, line := range block { 162 if !isBlank(line) { 163 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 164 } 165 } 166 n := len(prefix) 167 168 // remove 169 for i, line := range block { 170 if !isBlank(line) { 171 block[i] = line[n:] 172 } 173 } 174 } 175 176 // heading returns the trimmed line if it passes as a section heading; 177 // otherwise it returns the empty string. 178 func heading(line string) string { 179 line = strings.TrimSpace(line) 180 if len(line) == 0 { 181 return "" 182 } 183 184 // a heading must start with an uppercase letter 185 r, _ := utf8.DecodeRuneInString(line) 186 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 187 return "" 188 } 189 190 // it must end in a letter or digit: 191 r, _ = utf8.DecodeLastRuneInString(line) 192 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 193 return "" 194 } 195 196 // exclude lines with illegal characters 197 if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { 198 return "" 199 } 200 201 // allow "'" for possessive "'s" only 202 for b := line; ; { 203 i := strings.IndexRune(b, '\'') 204 if i < 0 { 205 break 206 } 207 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 208 return "" // not followed by "s " 209 } 210 b = b[i+2:] 211 } 212 213 return line 214 } 215 216 type op int 217 218 const ( 219 opPara op = iota 220 opHead 221 opPre 222 ) 223 224 type block struct { 225 op op 226 lines []string 227 } 228 229 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) 230 231 func anchorID(line string) string { 232 // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. 233 return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") 234 } 235 236 // ToHTML converts comment text to formatted HTML. 237 // The comment was prepared by DocReader, 238 // so it is known not to have leading, trailing blank lines 239 // nor to have trailing spaces at the end of lines. 240 // The comment markers have already been removed. 241 // 242 // Each span of unindented non-blank lines is converted into 243 // a single paragraph. There is one exception to the rule: a span that 244 // consists of a single line, is followed by another paragraph span, 245 // begins with a capital letter, and contains no punctuation 246 // is formatted as a heading. 247 // 248 // A span of indented lines is converted into a <pre> block, 249 // with the common indent prefix removed. 250 // 251 // URLs in the comment text are converted into links; if the URL also appears 252 // in the words map, the link is taken from the map (if the corresponding map 253 // value is the empty string, the URL is not converted into a link). 254 // 255 // Go identifiers that appear in the words map are italicized; if the corresponding 256 // map value is not the empty string, it is considered a URL and the word is converted 257 // into a link. 258 func ToHTML(w io.Writer, text string, words map[string]string) { 259 for _, b := range blocks(text) { 260 switch b.op { 261 case opPara: 262 w.Write(html_p) 263 for _, line := range b.lines { 264 emphasize(w, line, words, true) 265 } 266 w.Write(html_endp) 267 case opHead: 268 w.Write(html_h) 269 id := "" 270 for _, line := range b.lines { 271 if id == "" { 272 id = anchorID(line) 273 w.Write([]byte(id)) 274 w.Write(html_hq) 275 } 276 commentEscape(w, line, true) 277 } 278 if id == "" { 279 w.Write(html_hq) 280 } 281 w.Write(html_endh) 282 case opPre: 283 w.Write(html_pre) 284 for _, line := range b.lines { 285 emphasize(w, line, nil, false) 286 } 287 w.Write(html_endpre) 288 } 289 } 290 } 291 292 func blocks(text string) []block { 293 var ( 294 out []block 295 para []string 296 297 lastWasBlank = false 298 lastWasHeading = false 299 ) 300 301 close := func() { 302 if para != nil { 303 out = append(out, block{opPara, para}) 304 para = nil 305 } 306 } 307 308 lines := strings.SplitAfter(text, "\n") 309 unindent(lines) 310 for i := 0; i < len(lines); { 311 line := lines[i] 312 if isBlank(line) { 313 // close paragraph 314 close() 315 i++ 316 lastWasBlank = true 317 continue 318 } 319 if indentLen(line) > 0 { 320 // close paragraph 321 close() 322 323 // count indented or blank lines 324 j := i + 1 325 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 326 j++ 327 } 328 // but not trailing blank lines 329 for j > i && isBlank(lines[j-1]) { 330 j-- 331 } 332 pre := lines[i:j] 333 i = j 334 335 unindent(pre) 336 337 // put those lines in a pre block 338 out = append(out, block{opPre, pre}) 339 lastWasHeading = false 340 continue 341 } 342 343 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 344 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 345 // current line is non-blank, surrounded by blank lines 346 // and the next non-blank line is not indented: this 347 // might be a heading. 348 if head := heading(line); head != "" { 349 close() 350 out = append(out, block{opHead, []string{head}}) 351 i += 2 352 lastWasHeading = true 353 continue 354 } 355 } 356 357 // open paragraph 358 lastWasBlank = false 359 lastWasHeading = false 360 para = append(para, lines[i]) 361 i++ 362 } 363 close() 364 365 return out 366 } 367 368 // ToText prepares comment text for presentation in textual output. 369 // It wraps paragraphs of text to width or fewer Unicode code points 370 // and then prefixes each line with the indent. In preformatted sections 371 // (such as program text), it prefixes each non-blank line with preIndent. 372 func ToText(w io.Writer, text string, indent, preIndent string, width int) { 373 l := lineWrapper{ 374 out: w, 375 width: width, 376 indent: indent, 377 } 378 for _, b := range blocks(text) { 379 switch b.op { 380 case opPara: 381 // l.write will add leading newline if required 382 for _, line := range b.lines { 383 l.write(line) 384 } 385 l.flush() 386 case opHead: 387 w.Write(nl) 388 for _, line := range b.lines { 389 l.write(line + "\n") 390 } 391 l.flush() 392 case opPre: 393 w.Write(nl) 394 for _, line := range b.lines { 395 if !isBlank(line) { 396 w.Write([]byte(preIndent)) 397 w.Write([]byte(line)) 398 } 399 } 400 } 401 } 402 } 403 404 type lineWrapper struct { 405 out io.Writer 406 printed bool 407 width int 408 indent string 409 n int 410 pendSpace int 411 } 412 413 var nl = []byte("\n") 414 var space = []byte(" ") 415 416 func (l *lineWrapper) write(text string) { 417 if l.n == 0 && l.printed { 418 l.out.Write(nl) // blank line before new paragraph 419 } 420 l.printed = true 421 422 for _, f := range strings.Fields(text) { 423 w := utf8.RuneCountInString(f) 424 // wrap if line is too long 425 if l.n > 0 && l.n+l.pendSpace+w > l.width { 426 l.out.Write(nl) 427 l.n = 0 428 l.pendSpace = 0 429 } 430 if l.n == 0 { 431 l.out.Write([]byte(l.indent)) 432 } 433 l.out.Write(space[:l.pendSpace]) 434 l.out.Write([]byte(f)) 435 l.n += l.pendSpace + w 436 l.pendSpace = 1 437 } 438 } 439 440 func (l *lineWrapper) flush() { 441 if l.n == 0 { 442 return 443 } 444 l.out.Write(nl) 445 l.pendSpace = 0 446 l.n = 0 447 }