github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/internal/lsp/source/comment.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package source
     6  
     7  import (
     8  	"bytes"
     9  	"io"
    10  	"regexp"
    11  	"strings"
    12  	"unicode"
    13  	"unicode/utf8"
    14  )
    15  
    16  // CommentToMarkdown converts comment text to formatted markdown.
    17  // The comment was prepared by DocReader,
    18  // so it is known not to have leading, trailing blank lines
    19  // nor to have trailing spaces at the end of lines.
    20  // The comment markers have already been removed.
    21  //
    22  // Each line is converted into a markdown line and empty lines are just converted to
    23  // newlines. Heading are prefixed with `### ` to make it a markdown heading.
    24  //
    25  // A span of indented lines retains a 4 space prefix block, with the common indent
    26  // prefix removed unless empty, in which case it will be converted to a newline.
    27  //
    28  // URLs in the comment text are converted into links.
    29  func CommentToMarkdown(text string) string {
    30  	buf := &bytes.Buffer{}
    31  	commentToMarkdown(buf, text)
    32  	return buf.String()
    33  }
    34  
    35  var (
    36  	mdNewline   = []byte("\n")
    37  	mdHeader    = []byte("### ")
    38  	mdIndent    = []byte("    ")
    39  	mdLinkStart = []byte("[")
    40  	mdLinkDiv   = []byte("](")
    41  	mdLinkEnd   = []byte(")")
    42  )
    43  
    44  func commentToMarkdown(w io.Writer, text string) {
    45  	blocks := blocks(text)
    46  	for i, b := range blocks {
    47  		switch b.op {
    48  		case opPara:
    49  			for _, line := range b.lines {
    50  				emphasize(w, line, true)
    51  			}
    52  		case opHead:
    53  			// The header block can consist of only one line.
    54  			// However, check the number of lines, just in case.
    55  			if len(b.lines) == 0 {
    56  				// Skip this block.
    57  				continue
    58  			}
    59  			header := b.lines[0]
    60  
    61  			w.Write(mdHeader)
    62  			commentEscape(w, header, true)
    63  			// Header doesn't end with \n unlike the lines of other blocks.
    64  			w.Write(mdNewline)
    65  		case opPre:
    66  			for _, line := range b.lines {
    67  				if isBlank(line) {
    68  					w.Write(mdNewline)
    69  					continue
    70  				}
    71  				w.Write(mdIndent)
    72  				w.Write([]byte(line))
    73  			}
    74  		}
    75  
    76  		if i < len(blocks)-1 {
    77  			w.Write(mdNewline)
    78  		}
    79  	}
    80  }
    81  
    82  const (
    83  	ulquo = "“"
    84  	urquo = "”"
    85  )
    86  
    87  var (
    88  	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
    89  
    90  	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
    91  )
    92  
    93  // commentEscape escapes comment text for markdown. If nice is set,
    94  // also turn `` into “; and '' into ”;.
    95  func commentEscape(w io.Writer, text string, nice bool) {
    96  	if nice {
    97  		text = convertQuotes(text)
    98  	}
    99  	text = escapeRegex(text)
   100  	w.Write([]byte(text))
   101  }
   102  
   103  func convertQuotes(text string) string {
   104  	return unicodeQuoteReplacer.Replace(text)
   105  }
   106  
   107  func escapeRegex(text string) string {
   108  	return markdownEscape.ReplaceAllString(text, `\$1`)
   109  }
   110  
   111  func emphasize(w io.Writer, line string, nice bool) {
   112  	for {
   113  		m := matchRx.FindStringSubmatchIndex(line)
   114  		if m == nil {
   115  			break
   116  		}
   117  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   118  
   119  		// write text before match
   120  		commentEscape(w, line[0:m[0]], nice)
   121  
   122  		// adjust match for URLs
   123  		match := line[m[0]:m[1]]
   124  		if strings.Contains(match, "://") {
   125  			m0, m1 := m[0], m[1]
   126  			for _, s := range []string{"()", "{}", "[]"} {
   127  				open, close := s[:1], s[1:] // E.g., "(" and ")"
   128  				// require opening parentheses before closing parentheses (#22285)
   129  				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
   130  					m1 = m0 + i
   131  					match = line[m0:m1]
   132  				}
   133  				// require balanced pairs of parentheses (#5043)
   134  				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
   135  					m1 = strings.LastIndexAny(line[:m1], s)
   136  					match = line[m0:m1]
   137  				}
   138  			}
   139  			if m1 != m[1] {
   140  				// redo matching with shortened line for correct indices
   141  				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
   142  			}
   143  		}
   144  
   145  		// Following code has been modified from go/doc since words is always
   146  		// nil. All html formatting has also been transformed into markdown formatting
   147  
   148  		// analyze match
   149  		url := ""
   150  		if m[2] >= 0 {
   151  			url = match
   152  		}
   153  
   154  		// write match
   155  		if len(url) > 0 {
   156  			w.Write(mdLinkStart)
   157  		}
   158  
   159  		commentEscape(w, match, nice)
   160  
   161  		if len(url) > 0 {
   162  			w.Write(mdLinkDiv)
   163  			w.Write([]byte(urlReplacer.Replace(url)))
   164  			w.Write(mdLinkEnd)
   165  		}
   166  
   167  		// advance
   168  		line = line[m[1]:]
   169  	}
   170  	commentEscape(w, line, nice)
   171  }
   172  
   173  // Everything from here on is a copy of go/doc/comment.go
   174  
   175  const (
   176  	// Regexp for Go identifiers
   177  	identRx = `[\pL_][\pL_0-9]*`
   178  
   179  	// Regexp for URLs
   180  	// Match parens, and check later for balance - see #5043, #22285
   181  	// Match .,:;?! within path, but not at end - see #18139, #16565
   182  	// This excludes some rare yet valid urls ending in common punctuation
   183  	// in order to allow sentences ending in URLs.
   184  
   185  	// protocol (required) e.g. http
   186  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
   187  	// host (required) e.g. www.example.com or [::1]:8080
   188  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
   189  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
   190  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
   191  
   192  	urlRx = protoPart + `://` + hostPart + pathPart
   193  )
   194  
   195  var (
   196  	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
   197  	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
   198  )
   199  
   200  func indentLen(s string) int {
   201  	i := 0
   202  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   203  		i++
   204  	}
   205  	return i
   206  }
   207  
   208  func isBlank(s string) bool {
   209  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   210  }
   211  
   212  func commonPrefix(a, b string) string {
   213  	i := 0
   214  	for i < len(a) && i < len(b) && a[i] == b[i] {
   215  		i++
   216  	}
   217  	return a[0:i]
   218  }
   219  
   220  func unindent(block []string) {
   221  	if len(block) == 0 {
   222  		return
   223  	}
   224  
   225  	// compute maximum common white prefix
   226  	prefix := block[0][0:indentLen(block[0])]
   227  	for _, line := range block {
   228  		if !isBlank(line) {
   229  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   230  		}
   231  	}
   232  	n := len(prefix)
   233  
   234  	// remove
   235  	for i, line := range block {
   236  		if !isBlank(line) {
   237  			block[i] = line[n:]
   238  		}
   239  	}
   240  }
   241  
   242  // heading returns the trimmed line if it passes as a section heading;
   243  // otherwise it returns the empty string.
   244  func heading(line string) string {
   245  	line = strings.TrimSpace(line)
   246  	if len(line) == 0 {
   247  		return ""
   248  	}
   249  
   250  	// a heading must start with an uppercase letter
   251  	r, _ := utf8.DecodeRuneInString(line)
   252  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   253  		return ""
   254  	}
   255  
   256  	// it must end in a letter or digit:
   257  	r, _ = utf8.DecodeLastRuneInString(line)
   258  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   259  		return ""
   260  	}
   261  
   262  	// exclude lines with illegal characters. we allow "(),"
   263  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
   264  		return ""
   265  	}
   266  
   267  	// allow "'" for possessive "'s" only
   268  	for b := line; ; {
   269  		i := strings.IndexRune(b, '\'')
   270  		if i < 0 {
   271  			break
   272  		}
   273  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   274  			return "" // not followed by "s "
   275  		}
   276  		b = b[i+2:]
   277  	}
   278  
   279  	// allow "." when followed by non-space
   280  	for b := line; ; {
   281  		i := strings.IndexRune(b, '.')
   282  		if i < 0 {
   283  			break
   284  		}
   285  		if i+1 >= len(b) || b[i+1] == ' ' {
   286  			return "" // not followed by non-space
   287  		}
   288  		b = b[i+1:]
   289  	}
   290  
   291  	return line
   292  }
   293  
   294  type op int
   295  
   296  const (
   297  	opPara op = iota
   298  	opHead
   299  	opPre
   300  )
   301  
   302  type block struct {
   303  	op    op
   304  	lines []string
   305  }
   306  
   307  func blocks(text string) []block {
   308  	var (
   309  		out  []block
   310  		para []string
   311  
   312  		lastWasBlank   = false
   313  		lastWasHeading = false
   314  	)
   315  
   316  	close := func() {
   317  		if para != nil {
   318  			out = append(out, block{opPara, para})
   319  			para = nil
   320  		}
   321  	}
   322  
   323  	lines := strings.SplitAfter(text, "\n")
   324  	unindent(lines)
   325  	for i := 0; i < len(lines); {
   326  		line := lines[i]
   327  		if isBlank(line) {
   328  			// close paragraph
   329  			close()
   330  			i++
   331  			lastWasBlank = true
   332  			continue
   333  		}
   334  		if indentLen(line) > 0 {
   335  			// close paragraph
   336  			close()
   337  
   338  			// count indented or blank lines
   339  			j := i + 1
   340  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   341  				j++
   342  			}
   343  			// but not trailing blank lines
   344  			for j > i && isBlank(lines[j-1]) {
   345  				j--
   346  			}
   347  			pre := lines[i:j]
   348  			i = j
   349  
   350  			unindent(pre)
   351  
   352  			// put those lines in a pre block
   353  			out = append(out, block{opPre, pre})
   354  			lastWasHeading = false
   355  			continue
   356  		}
   357  
   358  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   359  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   360  			// current line is non-blank, surrounded by blank lines
   361  			// and the next non-blank line is not indented: this
   362  			// might be a heading.
   363  			if head := heading(line); head != "" {
   364  				close()
   365  				out = append(out, block{opHead, []string{head}})
   366  				i += 2
   367  				lastWasHeading = true
   368  				continue
   369  			}
   370  		}
   371  
   372  		// open paragraph
   373  		lastWasBlank = false
   374  		lastWasHeading = false
   375  		para = append(para, lines[i])
   376  		i++
   377  	}
   378  	close()
   379  
   380  	return out
   381  }