github.com/rohankumardubey/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/go/doc/comment.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Godoc comment extraction and comment -> HTML formatting.
     6  
     7  package doc
     8  
     9  import (
    10  	"io"
    11  	"regexp"
    12  	"strings"
    13  	"text/template" // for HTMLEscape
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  var (
    19  	ldquo = []byte("“")
    20  	rdquo = []byte("”")
    21  )
    22  
    23  // Escape comment text for HTML. If nice is set,
    24  // also turn `` into “ and '' into ”.
    25  func commentEscape(w io.Writer, text string, nice bool) {
    26  	last := 0
    27  	if nice {
    28  		for i := 0; i < len(text)-1; i++ {
    29  			ch := text[i]
    30  			if ch == text[i+1] && (ch == '`' || ch == '\'') {
    31  				template.HTMLEscape(w, []byte(text[last:i]))
    32  				last = i + 2
    33  				switch ch {
    34  				case '`':
    35  					w.Write(ldquo)
    36  				case '\'':
    37  					w.Write(rdquo)
    38  				}
    39  				i++ // loop will add one more
    40  			}
    41  		}
    42  	}
    43  	template.HTMLEscape(w, []byte(text[last:]))
    44  }
    45  
    46  const (
    47  	// Regexp for Go identifiers
    48  	identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
    49  
    50  	// Regexp for URLs
    51  	protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
    52  	hostPart = `[a-zA-Z0-9_@\-]+`
    53  	filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
    54  	urlRx    = protocol + `//` + // http://
    55  		hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
    56  		filePart + `([:.,]` + filePart + `)*`
    57  )
    58  
    59  var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
    60  
    61  var (
    62  	html_a      = []byte(`<a href="`)
    63  	html_aq     = []byte(`">`)
    64  	html_enda   = []byte("</a>")
    65  	html_i      = []byte("<i>")
    66  	html_endi   = []byte("</i>")
    67  	html_p      = []byte("<p>\n")
    68  	html_endp   = []byte("</p>\n")
    69  	html_pre    = []byte("<pre>")
    70  	html_endpre = []byte("</pre>\n")
    71  	html_h      = []byte(`<h3 id="`)
    72  	html_hq     = []byte(`">`)
    73  	html_endh   = []byte("</h3>\n")
    74  )
    75  
    76  // Emphasize and escape a line of text for HTML. URLs are converted into links;
    77  // if the URL also appears in the words map, the link is taken from the map (if
    78  // the corresponding map value is the empty string, the URL is not converted
    79  // into a link). Go identifiers that appear in the words map are italicized; if
    80  // the corresponding map value is not the empty string, it is considered a URL
    81  // and the word is converted into a link. If nice is set, the remaining text's
    82  // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
    83  // and '' into &rdquo;).
    84  func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
    85  	for {
    86  		m := matchRx.FindStringSubmatchIndex(line)
    87  		if m == nil {
    88  			break
    89  		}
    90  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
    91  
    92  		// write text before match
    93  		commentEscape(w, line[0:m[0]], nice)
    94  
    95  		// analyze match
    96  		match := line[m[0]:m[1]]
    97  		url := ""
    98  		italics := false
    99  		if words != nil {
   100  			url, italics = words[string(match)]
   101  		}
   102  		if m[2] >= 0 {
   103  			// match against first parenthesized sub-regexp; must be match against urlRx
   104  			if !italics {
   105  				// no alternative URL in words list, use match instead
   106  				url = string(match)
   107  			}
   108  			italics = false // don't italicize URLs
   109  		}
   110  
   111  		// write match
   112  		if len(url) > 0 {
   113  			w.Write(html_a)
   114  			template.HTMLEscape(w, []byte(url))
   115  			w.Write(html_aq)
   116  		}
   117  		if italics {
   118  			w.Write(html_i)
   119  		}
   120  		commentEscape(w, match, nice)
   121  		if italics {
   122  			w.Write(html_endi)
   123  		}
   124  		if len(url) > 0 {
   125  			w.Write(html_enda)
   126  		}
   127  
   128  		// advance
   129  		line = line[m[1]:]
   130  	}
   131  	commentEscape(w, line, nice)
   132  }
   133  
   134  func indentLen(s string) int {
   135  	i := 0
   136  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   137  		i++
   138  	}
   139  	return i
   140  }
   141  
   142  func isBlank(s string) bool {
   143  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   144  }
   145  
   146  func commonPrefix(a, b string) string {
   147  	i := 0
   148  	for i < len(a) && i < len(b) && a[i] == b[i] {
   149  		i++
   150  	}
   151  	return a[0:i]
   152  }
   153  
   154  func unindent(block []string) {
   155  	if len(block) == 0 {
   156  		return
   157  	}
   158  
   159  	// compute maximum common white prefix
   160  	prefix := block[0][0:indentLen(block[0])]
   161  	for _, line := range block {
   162  		if !isBlank(line) {
   163  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   164  		}
   165  	}
   166  	n := len(prefix)
   167  
   168  	// remove
   169  	for i, line := range block {
   170  		if !isBlank(line) {
   171  			block[i] = line[n:]
   172  		}
   173  	}
   174  }
   175  
   176  // heading returns the trimmed line if it passes as a section heading;
   177  // otherwise it returns the empty string.
   178  func heading(line string) string {
   179  	line = strings.TrimSpace(line)
   180  	if len(line) == 0 {
   181  		return ""
   182  	}
   183  
   184  	// a heading must start with an uppercase letter
   185  	r, _ := utf8.DecodeRuneInString(line)
   186  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   187  		return ""
   188  	}
   189  
   190  	// it must end in a letter or digit:
   191  	r, _ = utf8.DecodeLastRuneInString(line)
   192  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   193  		return ""
   194  	}
   195  
   196  	// exclude lines with illegal characters
   197  	if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
   198  		return ""
   199  	}
   200  
   201  	// allow "'" for possessive "'s" only
   202  	for b := line; ; {
   203  		i := strings.IndexRune(b, '\'')
   204  		if i < 0 {
   205  			break
   206  		}
   207  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   208  			return "" // not followed by "s "
   209  		}
   210  		b = b[i+2:]
   211  	}
   212  
   213  	return line
   214  }
   215  
   216  type op int
   217  
   218  const (
   219  	opPara op = iota
   220  	opHead
   221  	opPre
   222  )
   223  
   224  type block struct {
   225  	op    op
   226  	lines []string
   227  }
   228  
   229  var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
   230  
   231  func anchorID(line string) string {
   232  	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
   233  	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
   234  }
   235  
   236  // ToHTML converts comment text to formatted HTML.
   237  // The comment was prepared by DocReader,
   238  // so it is known not to have leading, trailing blank lines
   239  // nor to have trailing spaces at the end of lines.
   240  // The comment markers have already been removed.
   241  //
   242  // Each span of unindented non-blank lines is converted into
   243  // a single paragraph. There is one exception to the rule: a span that
   244  // consists of a single line, is followed by another paragraph span,
   245  // begins with a capital letter, and contains no punctuation
   246  // is formatted as a heading.
   247  //
   248  // A span of indented lines is converted into a <pre> block,
   249  // with the common indent prefix removed.
   250  //
   251  // URLs in the comment text are converted into links; if the URL also appears
   252  // in the words map, the link is taken from the map (if the corresponding map
   253  // value is the empty string, the URL is not converted into a link).
   254  //
   255  // Go identifiers that appear in the words map are italicized; if the corresponding
   256  // map value is not the empty string, it is considered a URL and the word is converted
   257  // into a link.
   258  func ToHTML(w io.Writer, text string, words map[string]string) {
   259  	for _, b := range blocks(text) {
   260  		switch b.op {
   261  		case opPara:
   262  			w.Write(html_p)
   263  			for _, line := range b.lines {
   264  				emphasize(w, line, words, true)
   265  			}
   266  			w.Write(html_endp)
   267  		case opHead:
   268  			w.Write(html_h)
   269  			id := ""
   270  			for _, line := range b.lines {
   271  				if id == "" {
   272  					id = anchorID(line)
   273  					w.Write([]byte(id))
   274  					w.Write(html_hq)
   275  				}
   276  				commentEscape(w, line, true)
   277  			}
   278  			if id == "" {
   279  				w.Write(html_hq)
   280  			}
   281  			w.Write(html_endh)
   282  		case opPre:
   283  			w.Write(html_pre)
   284  			for _, line := range b.lines {
   285  				emphasize(w, line, nil, false)
   286  			}
   287  			w.Write(html_endpre)
   288  		}
   289  	}
   290  }
   291  
   292  func blocks(text string) []block {
   293  	var (
   294  		out  []block
   295  		para []string
   296  
   297  		lastWasBlank   = false
   298  		lastWasHeading = false
   299  	)
   300  
   301  	close := func() {
   302  		if para != nil {
   303  			out = append(out, block{opPara, para})
   304  			para = nil
   305  		}
   306  	}
   307  
   308  	lines := strings.SplitAfter(text, "\n")
   309  	unindent(lines)
   310  	for i := 0; i < len(lines); {
   311  		line := lines[i]
   312  		if isBlank(line) {
   313  			// close paragraph
   314  			close()
   315  			i++
   316  			lastWasBlank = true
   317  			continue
   318  		}
   319  		if indentLen(line) > 0 {
   320  			// close paragraph
   321  			close()
   322  
   323  			// count indented or blank lines
   324  			j := i + 1
   325  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   326  				j++
   327  			}
   328  			// but not trailing blank lines
   329  			for j > i && isBlank(lines[j-1]) {
   330  				j--
   331  			}
   332  			pre := lines[i:j]
   333  			i = j
   334  
   335  			unindent(pre)
   336  
   337  			// put those lines in a pre block
   338  			out = append(out, block{opPre, pre})
   339  			lastWasHeading = false
   340  			continue
   341  		}
   342  
   343  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   344  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   345  			// current line is non-blank, surrounded by blank lines
   346  			// and the next non-blank line is not indented: this
   347  			// might be a heading.
   348  			if head := heading(line); head != "" {
   349  				close()
   350  				out = append(out, block{opHead, []string{head}})
   351  				i += 2
   352  				lastWasHeading = true
   353  				continue
   354  			}
   355  		}
   356  
   357  		// open paragraph
   358  		lastWasBlank = false
   359  		lastWasHeading = false
   360  		para = append(para, lines[i])
   361  		i++
   362  	}
   363  	close()
   364  
   365  	return out
   366  }
   367  
   368  // ToText prepares comment text for presentation in textual output.
   369  // It wraps paragraphs of text to width or fewer Unicode code points
   370  // and then prefixes each line with the indent.  In preformatted sections
   371  // (such as program text), it prefixes each non-blank line with preIndent.
   372  func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   373  	l := lineWrapper{
   374  		out:    w,
   375  		width:  width,
   376  		indent: indent,
   377  	}
   378  	for _, b := range blocks(text) {
   379  		switch b.op {
   380  		case opPara:
   381  			// l.write will add leading newline if required
   382  			for _, line := range b.lines {
   383  				l.write(line)
   384  			}
   385  			l.flush()
   386  		case opHead:
   387  			w.Write(nl)
   388  			for _, line := range b.lines {
   389  				l.write(line + "\n")
   390  			}
   391  			l.flush()
   392  		case opPre:
   393  			w.Write(nl)
   394  			for _, line := range b.lines {
   395  				if !isBlank(line) {
   396  					w.Write([]byte(preIndent))
   397  					w.Write([]byte(line))
   398  				}
   399  			}
   400  		}
   401  	}
   402  }
   403  
   404  type lineWrapper struct {
   405  	out       io.Writer
   406  	printed   bool
   407  	width     int
   408  	indent    string
   409  	n         int
   410  	pendSpace int
   411  }
   412  
   413  var nl = []byte("\n")
   414  var space = []byte(" ")
   415  
   416  func (l *lineWrapper) write(text string) {
   417  	if l.n == 0 && l.printed {
   418  		l.out.Write(nl) // blank line before new paragraph
   419  	}
   420  	l.printed = true
   421  
   422  	for _, f := range strings.Fields(text) {
   423  		w := utf8.RuneCountInString(f)
   424  		// wrap if line is too long
   425  		if l.n > 0 && l.n+l.pendSpace+w > l.width {
   426  			l.out.Write(nl)
   427  			l.n = 0
   428  			l.pendSpace = 0
   429  		}
   430  		if l.n == 0 {
   431  			l.out.Write([]byte(l.indent))
   432  		}
   433  		l.out.Write(space[:l.pendSpace])
   434  		l.out.Write([]byte(f))
   435  		l.n += l.pendSpace + w
   436  		l.pendSpace = 1
   437  	}
   438  }
   439  
   440  func (l *lineWrapper) flush() {
   441  	if l.n == 0 {
   442  		return
   443  	}
   444  	l.out.Write(nl)
   445  	l.pendSpace = 0
   446  	l.n = 0
   447  }