github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/go/doc/comment.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Godoc comment extraction and comment -> HTML formatting. 6 7 package doc 8 9 import ( 10 "bytes" 11 "io" 12 "strings" 13 "text/template" // for HTMLEscape 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 const ( 19 ldquo = "“" 20 rdquo = "”" 21 ulquo = "“" 22 urquo = "”" 23 ) 24 25 var ( 26 htmlQuoteReplacer = strings.NewReplacer(ulquo, ldquo, urquo, rdquo) 27 unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) 28 ) 29 30 // Escape comment text for HTML. If nice is set, 31 // also turn `` into “ and '' into ”. 32 func commentEscape(w io.Writer, text string, nice bool) { 33 if nice { 34 // In the first pass, we convert `` and '' into their unicode equivalents. 35 // This prevents them from being escaped in HTMLEscape. 36 text = convertQuotes(text) 37 var buf bytes.Buffer 38 template.HTMLEscape(&buf, []byte(text)) 39 // Now we convert the unicode quotes to their HTML escaped entities to maintain old behavior. 40 // We need to use a temp buffer to read the string back and do the conversion, 41 // otherwise HTMLEscape will escape & to & 42 htmlQuoteReplacer.WriteString(w, buf.String()) 43 return 44 } 45 template.HTMLEscape(w, []byte(text)) 46 } 47 48 func convertQuotes(text string) string { 49 return unicodeQuoteReplacer.Replace(text) 50 } 51 52 const ( 53 // Regexp for Go identifiers 54 identRx = `[\pL_][\pL_0-9]*` 55 56 // Regexp for URLs 57 // Match parens, and check later for balance - see #5043, #22285 58 // Match .,:;?! within path, but not at end - see #18139, #16565 59 // This excludes some rare yet valid urls ending in common punctuation 60 // in order to allow sentences ending in URLs. 61 62 // protocol (required) e.g. http 63 protoPart = `(https?|ftp|file|gopher|mailto|nntp)` 64 // host (required) e.g. www.example.com or [::1]:8080 65 hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` 66 // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar 67 pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` 68 69 urlRx = protoPart + `://` + hostPart + pathPart 70 ) 71 72 var matchRx = newLazyRE(`(` + urlRx + `)|(` + identRx + `)`) 73 74 var ( 75 html_a = []byte(`<a href="`) 76 html_aq = []byte(`">`) 77 html_enda = []byte("</a>") 78 html_i = []byte("<i>") 79 html_endi = []byte("</i>") 80 html_p = []byte("<p>\n") 81 html_endp = []byte("</p>\n") 82 html_pre = []byte("<pre>") 83 html_endpre = []byte("</pre>\n") 84 html_h = []byte(`<h3 id="`) 85 html_hq = []byte(`">`) 86 html_endh = []byte("</h3>\n") 87 ) 88 89 // Emphasize and escape a line of text for HTML. URLs are converted into links; 90 // if the URL also appears in the words map, the link is taken from the map (if 91 // the corresponding map value is the empty string, the URL is not converted 92 // into a link). Go identifiers that appear in the words map are italicized; if 93 // the corresponding map value is not the empty string, it is considered a URL 94 // and the word is converted into a link. If nice is set, the remaining text's 95 // appearance is improved where it makes sense (e.g., `` is turned into “ 96 // and '' into ”). 97 func emphasize(w io.Writer, line string, words map[string]string, nice bool) { 98 for { 99 m := matchRx.FindStringSubmatchIndex(line) 100 if m == nil { 101 break 102 } 103 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 104 105 // write text before match 106 commentEscape(w, line[0:m[0]], nice) 107 108 // adjust match for URLs 109 match := line[m[0]:m[1]] 110 if strings.Contains(match, "://") { 111 m0, m1 := m[0], m[1] 112 for _, s := range []string{"()", "{}", "[]"} { 113 open, close := s[:1], s[1:] // E.g., "(" and ")" 114 // require opening parentheses before closing parentheses (#22285) 115 if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { 116 m1 = m0 + i 117 match = line[m0:m1] 118 } 119 // require balanced pairs of parentheses (#5043) 120 for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { 121 m1 = strings.LastIndexAny(line[:m1], s) 122 match = line[m0:m1] 123 } 124 } 125 if m1 != m[1] { 126 // redo matching with shortened line for correct indices 127 m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) 128 } 129 } 130 131 // analyze match 132 url := "" 133 italics := false 134 if words != nil { 135 url, italics = words[match] 136 } 137 if m[2] >= 0 { 138 // match against first parenthesized sub-regexp; must be match against urlRx 139 if !italics { 140 // no alternative URL in words list, use match instead 141 url = match 142 } 143 italics = false // don't italicize URLs 144 } 145 146 // write match 147 if len(url) > 0 { 148 w.Write(html_a) 149 template.HTMLEscape(w, []byte(url)) 150 w.Write(html_aq) 151 } 152 if italics { 153 w.Write(html_i) 154 } 155 commentEscape(w, match, nice) 156 if italics { 157 w.Write(html_endi) 158 } 159 if len(url) > 0 { 160 w.Write(html_enda) 161 } 162 163 // advance 164 line = line[m[1]:] 165 } 166 commentEscape(w, line, nice) 167 } 168 169 func indentLen(s string) int { 170 i := 0 171 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 172 i++ 173 } 174 return i 175 } 176 177 func isBlank(s string) bool { 178 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 179 } 180 181 func commonPrefix(a, b string) string { 182 i := 0 183 for i < len(a) && i < len(b) && a[i] == b[i] { 184 i++ 185 } 186 return a[0:i] 187 } 188 189 func unindent(block []string) { 190 if len(block) == 0 { 191 return 192 } 193 194 // compute maximum common white prefix 195 prefix := block[0][0:indentLen(block[0])] 196 for _, line := range block { 197 if !isBlank(line) { 198 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 199 } 200 } 201 n := len(prefix) 202 203 // remove 204 for i, line := range block { 205 if !isBlank(line) { 206 block[i] = line[n:] 207 } 208 } 209 } 210 211 // heading returns the trimmed line if it passes as a section heading; 212 // otherwise it returns the empty string. 213 func heading(line string) string { 214 line = strings.TrimSpace(line) 215 if len(line) == 0 { 216 return "" 217 } 218 219 // a heading must start with an uppercase letter 220 r, _ := utf8.DecodeRuneInString(line) 221 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 222 return "" 223 } 224 225 // it must end in a letter or digit: 226 r, _ = utf8.DecodeLastRuneInString(line) 227 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 228 return "" 229 } 230 231 // exclude lines with illegal characters. we allow "()," 232 if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { 233 return "" 234 } 235 236 // allow "'" for possessive "'s" only 237 for b := line; ; { 238 i := strings.IndexRune(b, '\'') 239 if i < 0 { 240 break 241 } 242 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 243 return "" // not followed by "s " 244 } 245 b = b[i+2:] 246 } 247 248 // allow "." when followed by non-space 249 for b := line; ; { 250 i := strings.IndexRune(b, '.') 251 if i < 0 { 252 break 253 } 254 if i+1 >= len(b) || b[i+1] == ' ' { 255 return "" // not followed by non-space 256 } 257 b = b[i+1:] 258 } 259 260 return line 261 } 262 263 type op int 264 265 const ( 266 opPara op = iota 267 opHead 268 opPre 269 ) 270 271 type block struct { 272 op op 273 lines []string 274 } 275 276 var nonAlphaNumRx = newLazyRE(`[^a-zA-Z0-9]`) 277 278 func anchorID(line string) string { 279 // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. 280 return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") 281 } 282 283 // ToHTML converts comment text to formatted HTML. 284 // The comment was prepared by DocReader, 285 // so it is known not to have leading, trailing blank lines 286 // nor to have trailing spaces at the end of lines. 287 // The comment markers have already been removed. 288 // 289 // Each span of unindented non-blank lines is converted into 290 // a single paragraph. There is one exception to the rule: a span that 291 // consists of a single line, is followed by another paragraph span, 292 // begins with a capital letter, and contains no punctuation 293 // other than parentheses and commas is formatted as a heading. 294 // 295 // A span of indented lines is converted into a <pre> block, 296 // with the common indent prefix removed. 297 // 298 // URLs in the comment text are converted into links; if the URL also appears 299 // in the words map, the link is taken from the map (if the corresponding map 300 // value is the empty string, the URL is not converted into a link). 301 // 302 // Go identifiers that appear in the words map are italicized; if the corresponding 303 // map value is not the empty string, it is considered a URL and the word is converted 304 // into a link. 305 func ToHTML(w io.Writer, text string, words map[string]string) { 306 for _, b := range blocks(text) { 307 switch b.op { 308 case opPara: 309 w.Write(html_p) 310 for _, line := range b.lines { 311 emphasize(w, line, words, true) 312 } 313 w.Write(html_endp) 314 case opHead: 315 w.Write(html_h) 316 id := "" 317 for _, line := range b.lines { 318 if id == "" { 319 id = anchorID(line) 320 w.Write([]byte(id)) 321 w.Write(html_hq) 322 } 323 commentEscape(w, line, true) 324 } 325 if id == "" { 326 w.Write(html_hq) 327 } 328 w.Write(html_endh) 329 case opPre: 330 w.Write(html_pre) 331 for _, line := range b.lines { 332 emphasize(w, line, nil, false) 333 } 334 w.Write(html_endpre) 335 } 336 } 337 } 338 339 func blocks(text string) []block { 340 var ( 341 out []block 342 para []string 343 344 lastWasBlank = false 345 lastWasHeading = false 346 ) 347 348 close := func() { 349 if para != nil { 350 out = append(out, block{opPara, para}) 351 para = nil 352 } 353 } 354 355 lines := strings.SplitAfter(text, "\n") 356 unindent(lines) 357 for i := 0; i < len(lines); { 358 line := lines[i] 359 if isBlank(line) { 360 // close paragraph 361 close() 362 i++ 363 lastWasBlank = true 364 continue 365 } 366 if indentLen(line) > 0 { 367 // close paragraph 368 close() 369 370 // count indented or blank lines 371 j := i + 1 372 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 373 j++ 374 } 375 // but not trailing blank lines 376 for j > i && isBlank(lines[j-1]) { 377 j-- 378 } 379 pre := lines[i:j] 380 i = j 381 382 unindent(pre) 383 384 // put those lines in a pre block 385 out = append(out, block{opPre, pre}) 386 lastWasHeading = false 387 continue 388 } 389 390 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 391 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 392 // current line is non-blank, surrounded by blank lines 393 // and the next non-blank line is not indented: this 394 // might be a heading. 395 if head := heading(line); head != "" { 396 close() 397 out = append(out, block{opHead, []string{head}}) 398 i += 2 399 lastWasHeading = true 400 continue 401 } 402 } 403 404 // open paragraph 405 lastWasBlank = false 406 lastWasHeading = false 407 para = append(para, lines[i]) 408 i++ 409 } 410 close() 411 412 return out 413 } 414 415 // ToText prepares comment text for presentation in textual output. 416 // It wraps paragraphs of text to width or fewer Unicode code points 417 // and then prefixes each line with the indent. In preformatted sections 418 // (such as program text), it prefixes each non-blank line with preIndent. 419 func ToText(w io.Writer, text string, indent, preIndent string, width int) { 420 l := lineWrapper{ 421 out: w, 422 width: width, 423 indent: indent, 424 } 425 for _, b := range blocks(text) { 426 switch b.op { 427 case opPara: 428 // l.write will add leading newline if required 429 for _, line := range b.lines { 430 line = convertQuotes(line) 431 l.write(line) 432 } 433 l.flush() 434 case opHead: 435 w.Write(nl) 436 for _, line := range b.lines { 437 line = convertQuotes(line) 438 l.write(line + "\n") 439 } 440 l.flush() 441 case opPre: 442 w.Write(nl) 443 for _, line := range b.lines { 444 if isBlank(line) { 445 w.Write([]byte("\n")) 446 } else { 447 w.Write([]byte(preIndent)) 448 line = convertQuotes(line) 449 w.Write([]byte(line)) 450 } 451 } 452 } 453 } 454 } 455 456 type lineWrapper struct { 457 out io.Writer 458 printed bool 459 width int 460 indent string 461 n int 462 pendSpace int 463 } 464 465 var nl = []byte("\n") 466 var space = []byte(" ") 467 468 func (l *lineWrapper) write(text string) { 469 if l.n == 0 && l.printed { 470 l.out.Write(nl) // blank line before new paragraph 471 } 472 l.printed = true 473 474 for _, f := range strings.Fields(text) { 475 w := utf8.RuneCountInString(f) 476 // wrap if line is too long 477 if l.n > 0 && l.n+l.pendSpace+w > l.width { 478 l.out.Write(nl) 479 l.n = 0 480 l.pendSpace = 0 481 } 482 if l.n == 0 { 483 l.out.Write([]byte(l.indent)) 484 } 485 l.out.Write(space[:l.pendSpace]) 486 l.out.Write([]byte(f)) 487 l.n += l.pendSpace + w 488 l.pendSpace = 1 489 } 490 } 491 492 func (l *lineWrapper) flush() { 493 if l.n == 0 { 494 return 495 } 496 l.out.Write(nl) 497 l.pendSpace = 0 498 l.n = 0 499 }