golang.org/x/tools/gopls@v0.15.3/internal/golang/comment.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !go1.19
     6  // +build !go1.19
     7  
     8  package golang
     9  
    10  import (
    11  	"bytes"
    12  	"io"
    13  	"regexp"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  
    18  	"golang.org/x/tools/gopls/internal/settings"
    19  )
    20  
    21  // CommentToMarkdown converts comment text to formatted markdown.
    22  // The comment was prepared by DocReader,
    23  // so it is known not to have leading, trailing blank lines
    24  // nor to have trailing spaces at the end of lines.
    25  // The comment markers have already been removed.
    26  //
    27  // Each line is converted into a markdown line and empty lines are just converted to
    28  // newlines. Heading are prefixed with `### ` to make it a markdown heading.
    29  //
    30  // A span of indented lines retains a 4 space prefix block, with the common indent
    31  // prefix removed unless empty, in which case it will be converted to a newline.
    32  //
    33  // URLs in the comment text are converted into links.
    34  func CommentToMarkdown(text string, _ *settings.Options) string {
    35  	buf := &bytes.Buffer{}
    36  	commentToMarkdown(buf, text)
    37  	return buf.String()
    38  }
    39  
    40  var (
    41  	mdNewline   = []byte("\n")
    42  	mdHeader    = []byte("### ")
    43  	mdIndent    = []byte("    ")
    44  	mdLinkStart = []byte("[")
    45  	mdLinkDiv   = []byte("](")
    46  	mdLinkEnd   = []byte(")")
    47  )
    48  
    49  func commentToMarkdown(w io.Writer, text string) {
    50  	blocks := blocks(text)
    51  	for i, b := range blocks {
    52  		switch b.op {
    53  		case opPara:
    54  			for _, line := range b.lines {
    55  				emphasize(w, line, true)
    56  			}
    57  		case opHead:
    58  			// The header block can consist of only one line.
    59  			// However, check the number of lines, just in case.
    60  			if len(b.lines) == 0 {
    61  				// Skip this block.
    62  				continue
    63  			}
    64  			header := b.lines[0]
    65  
    66  			w.Write(mdHeader)
    67  			commentEscape(w, header, true)
    68  			// Header doesn't end with \n unlike the lines of other blocks.
    69  			w.Write(mdNewline)
    70  		case opPre:
    71  			for _, line := range b.lines {
    72  				if isBlank(line) {
    73  					w.Write(mdNewline)
    74  					continue
    75  				}
    76  				w.Write(mdIndent)
    77  				w.Write([]byte(line))
    78  			}
    79  		}
    80  
    81  		if i < len(blocks)-1 {
    82  			w.Write(mdNewline)
    83  		}
    84  	}
    85  }
    86  
    87  const (
    88  	ulquo = "“"
    89  	urquo = "”"
    90  )
    91  
    92  var (
    93  	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
    94  
    95  	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
    96  )
    97  
    98  // commentEscape escapes comment text for markdown. If nice is set,
    99  // also turn double ` and ' into “ and ”.
   100  func commentEscape(w io.Writer, text string, nice bool) {
   101  	if nice {
   102  		text = convertQuotes(text)
   103  	}
   104  	text = escapeRegex(text)
   105  	w.Write([]byte(text))
   106  }
   107  
   108  func convertQuotes(text string) string {
   109  	return unicodeQuoteReplacer.Replace(text)
   110  }
   111  
   112  func escapeRegex(text string) string {
   113  	return markdownEscape.ReplaceAllString(text, `\$1`)
   114  }
   115  
   116  func emphasize(w io.Writer, line string, nice bool) {
   117  	for {
   118  		m := matchRx.FindStringSubmatchIndex(line)
   119  		if m == nil {
   120  			break
   121  		}
   122  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   123  
   124  		// write text before match
   125  		commentEscape(w, line[0:m[0]], nice)
   126  
   127  		// adjust match for URLs
   128  		match := line[m[0]:m[1]]
   129  		if strings.Contains(match, "://") {
   130  			m0, m1 := m[0], m[1]
   131  			for _, s := range []string{"()", "{}", "[]"} {
   132  				open, close := s[:1], s[1:] // E.g., "(" and ")"
   133  				// require opening parentheses before closing parentheses (#22285)
   134  				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
   135  					m1 = m0 + i
   136  					match = line[m0:m1]
   137  				}
   138  				// require balanced pairs of parentheses (#5043)
   139  				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
   140  					m1 = strings.LastIndexAny(line[:m1], s)
   141  					match = line[m0:m1]
   142  				}
   143  			}
   144  			if m1 != m[1] {
   145  				// redo matching with shortened line for correct indices
   146  				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
   147  			}
   148  		}
   149  
   150  		// Following code has been modified from go/doc since words is always
   151  		// nil. All html formatting has also been transformed into markdown formatting
   152  
   153  		// analyze match
   154  		url := ""
   155  		if m[2] >= 0 {
   156  			url = match
   157  		}
   158  
   159  		// write match
   160  		if len(url) > 0 {
   161  			w.Write(mdLinkStart)
   162  		}
   163  
   164  		commentEscape(w, match, nice)
   165  
   166  		if len(url) > 0 {
   167  			w.Write(mdLinkDiv)
   168  			w.Write([]byte(urlReplacer.Replace(url)))
   169  			w.Write(mdLinkEnd)
   170  		}
   171  
   172  		// advance
   173  		line = line[m[1]:]
   174  	}
   175  	commentEscape(w, line, nice)
   176  }
   177  
   178  // Everything from here on is a copy of go/doc/comment.go
   179  
   180  const (
   181  	// Regexp for Go identifiers
   182  	identRx = `[\pL_][\pL_0-9]*`
   183  
   184  	// Regexp for URLs
   185  	// Match parens, and check later for balance - see #5043, #22285
   186  	// Match .,:;?! within path, but not at end - see #18139, #16565
   187  	// This excludes some rare yet valid urls ending in common punctuation
   188  	// in order to allow sentences ending in URLs.
   189  
   190  	// protocol (required) e.g. http
   191  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
   192  	// host (required) e.g. www.example.com or [::1]:8080
   193  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
   194  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
   195  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
   196  
   197  	urlRx = protoPart + `://` + hostPart + pathPart
   198  )
   199  
   200  var (
   201  	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
   202  	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
   203  )
   204  
   205  func indentLen(s string) int {
   206  	i := 0
   207  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   208  		i++
   209  	}
   210  	return i
   211  }
   212  
   213  func isBlank(s string) bool {
   214  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   215  }
   216  
   217  func commonPrefix(a, b string) string {
   218  	i := 0
   219  	for i < len(a) && i < len(b) && a[i] == b[i] {
   220  		i++
   221  	}
   222  	return a[0:i]
   223  }
   224  
   225  func unindent(block []string) {
   226  	if len(block) == 0 {
   227  		return
   228  	}
   229  
   230  	// compute maximum common white prefix
   231  	prefix := block[0][0:indentLen(block[0])]
   232  	for _, line := range block {
   233  		if !isBlank(line) {
   234  			prefix = commonPrefix(prefix, line)
   235  		}
   236  	}
   237  	n := len(prefix)
   238  
   239  	// remove
   240  	for i, line := range block {
   241  		if !isBlank(line) {
   242  			block[i] = line[n:]
   243  		}
   244  	}
   245  }
   246  
   247  // heading returns the trimmed line if it passes as a section heading;
   248  // otherwise it returns the empty string.
   249  func heading(line string) string {
   250  	line = strings.TrimSpace(line)
   251  	if len(line) == 0 {
   252  		return ""
   253  	}
   254  
   255  	// a heading must start with an uppercase letter
   256  	r, _ := utf8.DecodeRuneInString(line)
   257  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   258  		return ""
   259  	}
   260  
   261  	// it must end in a letter or digit:
   262  	r, _ = utf8.DecodeLastRuneInString(line)
   263  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   264  		return ""
   265  	}
   266  
   267  	// exclude lines with illegal characters. we allow "(),"
   268  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
   269  		return ""
   270  	}
   271  
   272  	// allow "'" for possessive "'s" only
   273  	for b := line; ; {
   274  		i := strings.IndexRune(b, '\'')
   275  		if i < 0 {
   276  			break
   277  		}
   278  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   279  			return "" // not followed by "s "
   280  		}
   281  		b = b[i+2:]
   282  	}
   283  
   284  	// allow "." when followed by non-space
   285  	for b := line; ; {
   286  		i := strings.IndexRune(b, '.')
   287  		if i < 0 {
   288  			break
   289  		}
   290  		if i+1 >= len(b) || b[i+1] == ' ' {
   291  			return "" // not followed by non-space
   292  		}
   293  		b = b[i+1:]
   294  	}
   295  
   296  	return line
   297  }
   298  
   299  type op int
   300  
   301  const (
   302  	opPara op = iota
   303  	opHead
   304  	opPre
   305  )
   306  
   307  type block struct {
   308  	op    op
   309  	lines []string
   310  }
   311  
   312  func blocks(text string) []block {
   313  	var (
   314  		out  []block
   315  		para []string
   316  
   317  		lastWasBlank   = false
   318  		lastWasHeading = false
   319  	)
   320  
   321  	close := func() {
   322  		if para != nil {
   323  			out = append(out, block{opPara, para})
   324  			para = nil
   325  		}
   326  	}
   327  
   328  	lines := strings.SplitAfter(text, "\n")
   329  	unindent(lines)
   330  	for i := 0; i < len(lines); {
   331  		line := lines[i]
   332  		if isBlank(line) {
   333  			// close paragraph
   334  			close()
   335  			i++
   336  			lastWasBlank = true
   337  			continue
   338  		}
   339  		if indentLen(line) > 0 {
   340  			// close paragraph
   341  			close()
   342  
   343  			// count indented or blank lines
   344  			j := i + 1
   345  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   346  				j++
   347  			}
   348  			// but not trailing blank lines
   349  			for j > i && isBlank(lines[j-1]) {
   350  				j--
   351  			}
   352  			pre := lines[i:j]
   353  			i = j
   354  
   355  			unindent(pre)
   356  
   357  			// put those lines in a pre block
   358  			out = append(out, block{opPre, pre})
   359  			lastWasHeading = false
   360  			continue
   361  		}
   362  
   363  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   364  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   365  			// current line is non-blank, surrounded by blank lines
   366  			// and the next non-blank line is not indented: this
   367  			// might be a heading.
   368  			if head := heading(line); head != "" {
   369  				close()
   370  				out = append(out, block{opHead, []string{head}})
   371  				i += 2
   372  				lastWasHeading = true
   373  				continue
   374  			}
   375  		}
   376  
   377  		// open paragraph
   378  		lastWasBlank = false
   379  		lastWasHeading = false
   380  		para = append(para, lines[i])
   381  		i++
   382  	}
   383  	close()
   384  
   385  	return out
   386  }