github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/go/doc/comment.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Godoc comment extraction and comment -> HTML formatting.
     6  
     7  package doc
     8  
     9  import (
    10  	"io"
    11  	"regexp"
    12  	"strings"
    13  	"text/template" // for HTMLEscape
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  var (
    19  	ldquo = []byte("“")
    20  	rdquo = []byte("”")
    21  )
    22  
    23  // Escape comment text for HTML. If nice is set,
    24  // also turn `` into “ and '' into ”.
    25  func commentEscape(w io.Writer, text string, nice bool) {
    26  	last := 0
    27  	if nice {
    28  		for i := 0; i < len(text)-1; i++ {
    29  			ch := text[i]
    30  			if ch == text[i+1] && (ch == '`' || ch == '\'') {
    31  				template.HTMLEscape(w, []byte(text[last:i]))
    32  				last = i + 2
    33  				switch ch {
    34  				case '`':
    35  					w.Write(ldquo)
    36  				case '\'':
    37  					w.Write(rdquo)
    38  				}
    39  				i++ // loop will add one more
    40  			}
    41  		}
    42  	}
    43  	template.HTMLEscape(w, []byte(text[last:]))
    44  }
    45  
    46  const (
    47  	// Regexp for Go identifiers
    48  	identRx = `[\pL_][\pL_0-9]*`
    49  
    50  	// Regexp for URLs
    51  	protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
    52  	hostPart = `[a-zA-Z0-9_@\-]+`
    53  	filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
    54  	urlRx    = `(` + protocol + `)://` +   // http://
    55  		hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
    56  		filePart + `([:.,]` + filePart + `)*`
    57  )
    58  
    59  var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
    60  
    61  var (
    62  	html_a      = []byte(`<a href="`)
    63  	html_aq     = []byte(`">`)
    64  	html_enda   = []byte("</a>")
    65  	html_i      = []byte("<i>")
    66  	html_endi   = []byte("</i>")
    67  	html_p      = []byte("<p>\n")
    68  	html_endp   = []byte("</p>\n")
    69  	html_pre    = []byte("<pre>")
    70  	html_endpre = []byte("</pre>\n")
    71  	html_h      = []byte(`<h3 id="`)
    72  	html_hq     = []byte(`">`)
    73  	html_endh   = []byte("</h3>\n")
    74  )
    75  
    76  // pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
    77  func pairedParensPrefixLen(s string) int {
    78  	parens := 0
    79  	l := len(s)
    80  	for i, ch := range s {
    81  		switch ch {
    82  		case '(':
    83  			if parens == 0 {
    84  				l = i
    85  			}
    86  			parens++
    87  		case ')':
    88  			parens--
    89  			if parens == 0 {
    90  				l = len(s)
    91  			} else if parens < 0 {
    92  				return i
    93  			}
    94  		}
    95  	}
    96  	return l
    97  }
    98  
    99  // Emphasize and escape a line of text for HTML. URLs are converted into links;
   100  // if the URL also appears in the words map, the link is taken from the map (if
   101  // the corresponding map value is the empty string, the URL is not converted
   102  // into a link). Go identifiers that appear in the words map are italicized; if
   103  // the corresponding map value is not the empty string, it is considered a URL
   104  // and the word is converted into a link. If nice is set, the remaining text's
   105  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
   106  // and '' into &rdquo;).
   107  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
   108  	for {
   109  		m := matchRx.FindStringSubmatchIndex(line)
   110  		if m == nil {
   111  			break
   112  		}
   113  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   114  
   115  		// write text before match
   116  		commentEscape(w, line[0:m[0]], nice)
   117  
   118  		// adjust match if necessary
   119  		match := line[m[0]:m[1]]
   120  		if n := pairedParensPrefixLen(match); n < len(match) {
   121  			// match contains unpaired parentheses (rare);
   122  			// redo matching with shortened line for correct indices
   123  			m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
   124  			match = match[:n]
   125  		}
   126  
   127  		// analyze match
   128  		url := ""
   129  		italics := false
   130  		if words != nil {
   131  			url, italics = words[match]
   132  		}
   133  		if m[2] >= 0 {
   134  			// match against first parenthesized sub-regexp; must be match against urlRx
   135  			if !italics {
   136  				// no alternative URL in words list, use match instead
   137  				url = match
   138  			}
   139  			italics = false // don't italicize URLs
   140  		}
   141  
   142  		// write match
   143  		if len(url) > 0 {
   144  			w.Write(html_a)
   145  			template.HTMLEscape(w, []byte(url))
   146  			w.Write(html_aq)
   147  		}
   148  		if italics {
   149  			w.Write(html_i)
   150  		}
   151  		commentEscape(w, match, nice)
   152  		if italics {
   153  			w.Write(html_endi)
   154  		}
   155  		if len(url) > 0 {
   156  			w.Write(html_enda)
   157  		}
   158  
   159  		// advance
   160  		line = line[m[1]:]
   161  	}
   162  	commentEscape(w, line, nice)
   163  }
   164  
   165  func indentLen(s string) int {
   166  	i := 0
   167  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   168  		i++
   169  	}
   170  	return i
   171  }
   172  
   173  func isBlank(s string) bool {
   174  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   175  }
   176  
   177  func commonPrefix(a, b string) string {
   178  	i := 0
   179  	for i < len(a) && i < len(b) && a[i] == b[i] {
   180  		i++
   181  	}
   182  	return a[0:i]
   183  }
   184  
   185  func unindent(block []string) {
   186  	if len(block) == 0 {
   187  		return
   188  	}
   189  
   190  	// compute maximum common white prefix
   191  	prefix := block[0][0:indentLen(block[0])]
   192  	for _, line := range block {
   193  		if !isBlank(line) {
   194  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   195  		}
   196  	}
   197  	n := len(prefix)
   198  
   199  	// remove
   200  	for i, line := range block {
   201  		if !isBlank(line) {
   202  			block[i] = line[n:]
   203  		}
   204  	}
   205  }
   206  
   207  // heading returns the trimmed line if it passes as a section heading;
   208  // otherwise it returns the empty string.
   209  func heading(line string) string {
   210  	line = strings.TrimSpace(line)
   211  	if len(line) == 0 {
   212  		return ""
   213  	}
   214  
   215  	// a heading must start with an uppercase letter
   216  	r, _ := utf8.DecodeRuneInString(line)
   217  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   218  		return ""
   219  	}
   220  
   221  	// it must end in a letter or digit:
   222  	r, _ = utf8.DecodeLastRuneInString(line)
   223  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   224  		return ""
   225  	}
   226  
   227  	// exclude lines with illegal characters
   228  	if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
   229  		return ""
   230  	}
   231  
   232  	// allow "'" for possessive "'s" only
   233  	for b := line; ; {
   234  		i := strings.IndexRune(b, '\'')
   235  		if i < 0 {
   236  			break
   237  		}
   238  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   239  			return "" // not followed by "s "
   240  		}
   241  		b = b[i+2:]
   242  	}
   243  
   244  	return line
   245  }
   246  
   247  type op int
   248  
   249  const (
   250  	opPara op = iota
   251  	opHead
   252  	opPre
   253  )
   254  
   255  type block struct {
   256  	op    op
   257  	lines []string
   258  }
   259  
   260  var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
   261  
   262  func anchorID(line string) string {
   263  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
   264  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
   265  }
   266  
   267  // ToHTML converts comment text to formatted HTML.
   268  // The comment was prepared by DocReader,
   269  // so it is known not to have leading, trailing blank lines
   270  // nor to have trailing spaces at the end of lines.
   271  // The comment markers have already been removed.
   272  //
   273  // Each span of unindented non-blank lines is converted into
   274  // a single paragraph. There is one exception to the rule: a span that
   275  // consists of a single line, is followed by another paragraph span,
   276  // begins with a capital letter, and contains no punctuation
   277  // is formatted as a heading.
   278  //
   279  // A span of indented lines is converted into a <pre> block,
   280  // with the common indent prefix removed.
   281  //
   282  // URLs in the comment text are converted into links; if the URL also appears
   283  // in the words map, the link is taken from the map (if the corresponding map
   284  // value is the empty string, the URL is not converted into a link).
   285  //
   286  // Go identifiers that appear in the words map are italicized; if the corresponding
   287  // map value is not the empty string, it is considered a URL and the word is converted
   288  // into a link.
   289  func ToHTML(w io.Writer, text string, words map[string]string) {
   290  	for _, b := range blocks(text) {
   291  		switch b.op {
   292  		case opPara:
   293  			w.Write(html_p)
   294  			for _, line := range b.lines {
   295  				emphasize(w, line, words, true)
   296  			}
   297  			w.Write(html_endp)
   298  		case opHead:
   299  			w.Write(html_h)
   300  			id := ""
   301  			for _, line := range b.lines {
   302  				if id == "" {
   303  					id = anchorID(line)
   304  					w.Write([]byte(id))
   305  					w.Write(html_hq)
   306  				}
   307  				commentEscape(w, line, true)
   308  			}
   309  			if id == "" {
   310  				w.Write(html_hq)
   311  			}
   312  			w.Write(html_endh)
   313  		case opPre:
   314  			w.Write(html_pre)
   315  			for _, line := range b.lines {
   316  				emphasize(w, line, nil, false)
   317  			}
   318  			w.Write(html_endpre)
   319  		}
   320  	}
   321  }
   322  
   323  func blocks(text string) []block {
   324  	var (
   325  		out  []block
   326  		para []string
   327  
   328  		lastWasBlank   = false
   329  		lastWasHeading = false
   330  	)
   331  
   332  	close := func() {
   333  		if para != nil {
   334  			out = append(out, block{opPara, para})
   335  			para = nil
   336  		}
   337  	}
   338  
   339  	lines := strings.SplitAfter(text, "\n")
   340  	unindent(lines)
   341  	for i := 0; i < len(lines); {
   342  		line := lines[i]
   343  		if isBlank(line) {
   344  			// close paragraph
   345  			close()
   346  			i++
   347  			lastWasBlank = true
   348  			continue
   349  		}
   350  		if indentLen(line) > 0 {
   351  			// close paragraph
   352  			close()
   353  
   354  			// count indented or blank lines
   355  			j := i + 1
   356  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   357  				j++
   358  			}
   359  			// but not trailing blank lines
   360  			for j > i && isBlank(lines[j-1]) {
   361  				j--
   362  			}
   363  			pre := lines[i:j]
   364  			i = j
   365  
   366  			unindent(pre)
   367  
   368  			// put those lines in a pre block
   369  			out = append(out, block{opPre, pre})
   370  			lastWasHeading = false
   371  			continue
   372  		}
   373  
   374  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   375  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   376  			// current line is non-blank, surrounded by blank lines
   377  			// and the next non-blank line is not indented: this
   378  			// might be a heading.
   379  			if head := heading(line); head != "" {
   380  				close()
   381  				out = append(out, block{opHead, []string{head}})
   382  				i += 2
   383  				lastWasHeading = true
   384  				continue
   385  			}
   386  		}
   387  
   388  		// open paragraph
   389  		lastWasBlank = false
   390  		lastWasHeading = false
   391  		para = append(para, lines[i])
   392  		i++
   393  	}
   394  	close()
   395  
   396  	return out
   397  }
   398  
   399  // ToText prepares comment text for presentation in textual output.
   400  // It wraps paragraphs of text to width or fewer Unicode code points
   401  // and then prefixes each line with the indent.  In preformatted sections
   402  // (such as program text), it prefixes each non-blank line with preIndent.
   403  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   404  	l := lineWrapper{
   405  		out:    w,
   406  		width:  width,
   407  		indent: indent,
   408  	}
   409  	for _, b := range blocks(text) {
   410  		switch b.op {
   411  		case opPara:
   412  			// l.write will add leading newline if required
   413  			for _, line := range b.lines {
   414  				l.write(line)
   415  			}
   416  			l.flush()
   417  		case opHead:
   418  			w.Write(nl)
   419  			for _, line := range b.lines {
   420  				l.write(line + "\n")
   421  			}
   422  			l.flush()
   423  		case opPre:
   424  			w.Write(nl)
   425  			for _, line := range b.lines {
   426  				if isBlank(line) {
   427  					w.Write([]byte("\n"))
   428  				} else {
   429  					w.Write([]byte(preIndent))
   430  					w.Write([]byte(line))
   431  				}
   432  			}
   433  		}
   434  	}
   435  }
   436  
   437  type lineWrapper struct {
   438  	out       io.Writer
   439  	printed   bool
   440  	width     int
   441  	indent    string
   442  	n         int
   443  	pendSpace int
   444  }
   445  
   446  var nl = []byte("\n")
   447  var space = []byte(" ")
   448  
   449  func (l *lineWrapper) write(text string) {
   450  	if l.n == 0 && l.printed {
   451  		l.out.Write(nl) // blank line before new paragraph
   452  	}
   453  	l.printed = true
   454  
   455  	for _, f := range strings.Fields(text) {
   456  		w := utf8.RuneCountInString(f)
   457  		// wrap if line is too long
   458  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
   459  			l.out.Write(nl)
   460  			l.n = 0
   461  			l.pendSpace = 0
   462  		}
   463  		if l.n == 0 {
   464  			l.out.Write([]byte(l.indent))
   465  		}
   466  		l.out.Write(space[:l.pendSpace])
   467  		l.out.Write([]byte(f))
   468  		l.n += l.pendSpace + w
   469  		l.pendSpace = 1
   470  	}
   471  }
   472  
   473  func (l *lineWrapper) flush() {
   474  	if l.n == 0 {
   475  		return
   476  	}
   477  	l.out.Write(nl)
   478  	l.pendSpace = 0
   479  	l.n = 0
   480  }