github.com/april1989/origin-go-tools@v0.0.32/internal/lsp/source/comment.go (about)

     1  package source
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"regexp"
     7  	"strings"
     8  	"unicode"
     9  	"unicode/utf8"
    10  )
    11  
    12  // CommentToMarkdown converts comment text to formatted markdown.
    13  // The comment was prepared by DocReader,
    14  // so it is known not to have leading, trailing blank lines
    15  // nor to have trailing spaces at the end of lines.
    16  // The comment markers have already been removed.
    17  //
    18  // Each line is converted into a markdown line and empty lines are just converted to
    19  // newlines. Heading are prefixed with `### ` to make it a markdown heading.
    20  //
    21  // A span of indented lines retains a 4 space prefix block, with the common indent
    22  // prefix removed unless empty, in which case it will be converted to a newline.
    23  //
    24  // URLs in the comment text are converted into links.
    25  func CommentToMarkdown(text string) string {
    26  	buf := &bytes.Buffer{}
    27  	commentToMarkdown(buf, text)
    28  	return buf.String()
    29  }
    30  
    31  var (
    32  	mdNewline   = []byte("\n")
    33  	mdHeader    = []byte("### ")
    34  	mdIndent    = []byte("    ")
    35  	mdLinkStart = []byte("[")
    36  	mdLinkDiv   = []byte("](")
    37  	mdLinkEnd   = []byte(")")
    38  )
    39  
    40  func commentToMarkdown(w io.Writer, text string) {
    41  	isFirstLine := true
    42  	for _, b := range blocks(text) {
    43  		switch b.op {
    44  		case opPara:
    45  			if !isFirstLine {
    46  				w.Write(mdNewline)
    47  			}
    48  
    49  			for _, line := range b.lines {
    50  				emphasize(w, line, true)
    51  			}
    52  		case opHead:
    53  			if !isFirstLine {
    54  				w.Write(mdNewline)
    55  			}
    56  			w.Write(mdNewline)
    57  
    58  			for _, line := range b.lines {
    59  				w.Write(mdHeader)
    60  				commentEscape(w, line, true)
    61  				w.Write(mdNewline)
    62  			}
    63  		case opPre:
    64  			if !isFirstLine {
    65  				w.Write(mdNewline)
    66  			}
    67  			w.Write(mdNewline)
    68  
    69  			for _, line := range b.lines {
    70  				if isBlank(line) {
    71  					w.Write(mdNewline)
    72  				} else {
    73  					w.Write(mdIndent)
    74  					w.Write([]byte(line))
    75  					w.Write(mdNewline)
    76  				}
    77  			}
    78  		}
    79  		isFirstLine = false
    80  	}
    81  }
    82  
    83  const (
    84  	ulquo = "“"
    85  	urquo = "”"
    86  )
    87  
    88  var (
    89  	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
    90  
    91  	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
    92  )
    93  
    94  // commentEscape escapes comment text for markdown. If nice is set,
    95  // also turn `` into “; and '' into ”;.
    96  func commentEscape(w io.Writer, text string, nice bool) {
    97  	if nice {
    98  		text = convertQuotes(text)
    99  	}
   100  	text = escapeRegex(text)
   101  	w.Write([]byte(text))
   102  }
   103  
   104  func convertQuotes(text string) string {
   105  	return unicodeQuoteReplacer.Replace(text)
   106  }
   107  
   108  func escapeRegex(text string) string {
   109  	return markdownEscape.ReplaceAllString(text, `\$1`)
   110  }
   111  
   112  func emphasize(w io.Writer, line string, nice bool) {
   113  	for {
   114  		m := matchRx.FindStringSubmatchIndex(line)
   115  		if m == nil {
   116  			break
   117  		}
   118  		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
   119  
   120  		// write text before match
   121  		commentEscape(w, line[0:m[0]], nice)
   122  
   123  		// adjust match for URLs
   124  		match := line[m[0]:m[1]]
   125  		if strings.Contains(match, "://") {
   126  			m0, m1 := m[0], m[1]
   127  			for _, s := range []string{"()", "{}", "[]"} {
   128  				open, close := s[:1], s[1:] // E.g., "(" and ")"
   129  				// require opening parentheses before closing parentheses (#22285)
   130  				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
   131  					m1 = m0 + i
   132  					match = line[m0:m1]
   133  				}
   134  				// require balanced pairs of parentheses (#5043)
   135  				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
   136  					m1 = strings.LastIndexAny(line[:m1], s)
   137  					match = line[m0:m1]
   138  				}
   139  			}
   140  			if m1 != m[1] {
   141  				// redo matching with shortened line for correct indices
   142  				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
   143  			}
   144  		}
   145  
   146  		// Following code has been modified from go/doc since words is always
   147  		// nil. All html formatting has also been transformed into markdown formatting
   148  
   149  		// analyze match
   150  		url := ""
   151  		if m[2] >= 0 {
   152  			url = match
   153  		}
   154  
   155  		// write match
   156  		if len(url) > 0 {
   157  			w.Write(mdLinkStart)
   158  		}
   159  
   160  		commentEscape(w, match, nice)
   161  
   162  		if len(url) > 0 {
   163  			w.Write(mdLinkDiv)
   164  			w.Write([]byte(urlReplacer.Replace(url)))
   165  			w.Write(mdLinkEnd)
   166  		}
   167  
   168  		// advance
   169  		line = line[m[1]:]
   170  	}
   171  	commentEscape(w, line, nice)
   172  }
   173  
   174  // Everything from here on is a copy of go/doc/comment.go
   175  
   176  const (
   177  	// Regexp for Go identifiers
   178  	identRx = `[\pL_][\pL_0-9]*`
   179  
   180  	// Regexp for URLs
   181  	// Match parens, and check later for balance - see #5043, #22285
   182  	// Match .,:;?! within path, but not at end - see #18139, #16565
   183  	// This excludes some rare yet valid urls ending in common punctuation
   184  	// in order to allow sentences ending in URLs.
   185  
   186  	// protocol (required) e.g. http
   187  	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
   188  	// host (required) e.g. www.example.com or [::1]:8080
   189  	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
   190  	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
   191  	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
   192  
   193  	urlRx = protoPart + `://` + hostPart + pathPart
   194  )
   195  
   196  var (
   197  	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
   198  	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
   199  )
   200  
   201  func indentLen(s string) int {
   202  	i := 0
   203  	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   204  		i++
   205  	}
   206  	return i
   207  }
   208  
   209  func isBlank(s string) bool {
   210  	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   211  }
   212  
   213  func commonPrefix(a, b string) string {
   214  	i := 0
   215  	for i < len(a) && i < len(b) && a[i] == b[i] {
   216  		i++
   217  	}
   218  	return a[0:i]
   219  }
   220  
   221  func unindent(block []string) {
   222  	if len(block) == 0 {
   223  		return
   224  	}
   225  
   226  	// compute maximum common white prefix
   227  	prefix := block[0][0:indentLen(block[0])]
   228  	for _, line := range block {
   229  		if !isBlank(line) {
   230  			prefix = commonPrefix(prefix, line[0:indentLen(line)])
   231  		}
   232  	}
   233  	n := len(prefix)
   234  
   235  	// remove
   236  	for i, line := range block {
   237  		if !isBlank(line) {
   238  			block[i] = line[n:]
   239  		}
   240  	}
   241  }
   242  
   243  // heading returns the trimmed line if it passes as a section heading;
   244  // otherwise it returns the empty string.
   245  func heading(line string) string {
   246  	line = strings.TrimSpace(line)
   247  	if len(line) == 0 {
   248  		return ""
   249  	}
   250  
   251  	// a heading must start with an uppercase letter
   252  	r, _ := utf8.DecodeRuneInString(line)
   253  	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   254  		return ""
   255  	}
   256  
   257  	// it must end in a letter or digit:
   258  	r, _ = utf8.DecodeLastRuneInString(line)
   259  	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   260  		return ""
   261  	}
   262  
   263  	// exclude lines with illegal characters. we allow "(),"
   264  	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
   265  		return ""
   266  	}
   267  
   268  	// allow "'" for possessive "'s" only
   269  	for b := line; ; {
   270  		i := strings.IndexRune(b, '\'')
   271  		if i < 0 {
   272  			break
   273  		}
   274  		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   275  			return "" // not followed by "s "
   276  		}
   277  		b = b[i+2:]
   278  	}
   279  
   280  	// allow "." when followed by non-space
   281  	for b := line; ; {
   282  		i := strings.IndexRune(b, '.')
   283  		if i < 0 {
   284  			break
   285  		}
   286  		if i+1 >= len(b) || b[i+1] == ' ' {
   287  			return "" // not followed by non-space
   288  		}
   289  		b = b[i+1:]
   290  	}
   291  
   292  	return line
   293  }
   294  
   295  type op int
   296  
   297  const (
   298  	opPara op = iota
   299  	opHead
   300  	opPre
   301  )
   302  
   303  type block struct {
   304  	op    op
   305  	lines []string
   306  }
   307  
   308  func blocks(text string) []block {
   309  	var (
   310  		out  []block
   311  		para []string
   312  
   313  		lastWasBlank   = false
   314  		lastWasHeading = false
   315  	)
   316  
   317  	close := func() {
   318  		if para != nil {
   319  			out = append(out, block{opPara, para})
   320  			para = nil
   321  		}
   322  	}
   323  
   324  	lines := strings.SplitAfter(text, "\n")
   325  	unindent(lines)
   326  	for i := 0; i < len(lines); {
   327  		line := lines[i]
   328  		if isBlank(line) {
   329  			// close paragraph
   330  			close()
   331  			i++
   332  			lastWasBlank = true
   333  			continue
   334  		}
   335  		if indentLen(line) > 0 {
   336  			// close paragraph
   337  			close()
   338  
   339  			// count indented or blank lines
   340  			j := i + 1
   341  			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   342  				j++
   343  			}
   344  			// but not trailing blank lines
   345  			for j > i && isBlank(lines[j-1]) {
   346  				j--
   347  			}
   348  			pre := lines[i:j]
   349  			i = j
   350  
   351  			unindent(pre)
   352  
   353  			// put those lines in a pre block
   354  			out = append(out, block{opPre, pre})
   355  			lastWasHeading = false
   356  			continue
   357  		}
   358  
   359  		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   360  			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   361  			// current line is non-blank, surrounded by blank lines
   362  			// and the next non-blank line is not indented: this
   363  			// might be a heading.
   364  			if head := heading(line); head != "" {
   365  				close()
   366  				out = append(out, block{opHead, []string{head}})
   367  				i += 2
   368  				lastWasHeading = true
   369  				continue
   370  			}
   371  		}
   372  
   373  		// open paragraph
   374  		lastWasBlank = false
   375  		lastWasHeading = false
   376  		para = append(para, lines[i])
   377  		i++
   378  	}
   379  	close()
   380  
   381  	return out
   382  }