github.com/errata-ai/vale/v3@v3.4.2/internal/lint/comments.go (about)

     1  package lint
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"regexp"
     7  	"strings"
     8  
     9  	"github.com/errata-ai/vale/v3/internal/core"
    10  )
    11  
    12  // Comment represents an in-code comment (line or block).
    13  type Comment struct {
    14  	Text   string
    15  	Line   int
    16  	Offset int
    17  	Scope  string
    18  }
    19  
    20  // NOTE: This is different from `internal/core/format.go` because we need to
    21  // handle each comment type separately in order to strip the prefixes
    22  // (e.g., "//" or "/*") from the matched text.
    23  //
    24  // It's also important to note that this is certainly the *wrong* way to do
    25  // this. We should handle code the same way we do markup -- by offloading the
    26  // parsing duties to dedicated libraries.
    27  //
    28  // In practice, the best option is probably to use `tree-sitter` (see the
    29  // relevant branch). However, the dependency is requires `CGO_ENABLED` and
    30  // nearly triples the size of the compiled binary. So ... we'll see.
    31  var patterns = map[string]map[string][]*regexp.Regexp{
    32  	".c": {
    33  		"inline": []*regexp.Regexp{
    34  			regexp.MustCompile(`(?s)/\*(.+)\*/`),
    35  			regexp.MustCompile(`(?s)/{2}(.+)`),
    36  		},
    37  		"blockStart": []*regexp.Regexp{
    38  			regexp.MustCompile(`(?ms)/\*(.+)`),
    39  		},
    40  		"blockEnd": []*regexp.Regexp{
    41  			regexp.MustCompile(`(.*\*/)`),
    42  		},
    43  	},
    44  	".clj": {
    45  		"inline": []*regexp.Regexp{
    46  			regexp.MustCompile(`(?s);+(.+)`),
    47  		},
    48  		"blockStart": []*regexp.Regexp{},
    49  		"blockEnd":   []*regexp.Regexp{},
    50  	},
    51  	".css": {
    52  		"inline": []*regexp.Regexp{
    53  			regexp.MustCompile(`(?s)/\*(.+)\*/`),
    54  		},
    55  		"blockStart": []*regexp.Regexp{
    56  			regexp.MustCompile(`(?ms)/\*(.+)`),
    57  		},
    58  		"blockEnd": []*regexp.Regexp{
    59  			regexp.MustCompile(`(.*\*/)`),
    60  		},
    61  	},
    62  	".rs": {
    63  		"inline": []*regexp.Regexp{
    64  			regexp.MustCompile(`(?s)/{3}!(.+)`),
    65  			regexp.MustCompile(`(?s)/{3}(.+)`),
    66  			regexp.MustCompile(`(?s)/{2}(.+)`),
    67  		},
    68  		"blockStart": []*regexp.Regexp{},
    69  		"blockEnd":   []*regexp.Regexp{},
    70  	},
    71  	".r": {
    72  		"inline": []*regexp.Regexp{
    73  			regexp.MustCompile(`(?s)#(.+)`),
    74  		},
    75  		"blockStart": []*regexp.Regexp{},
    76  		"blockEnd":   []*regexp.Regexp{},
    77  	},
    78  	".php": {
    79  		"inline": []*regexp.Regexp{
    80  			regexp.MustCompile(`(?s)/\*(.+)\*/`),
    81  			regexp.MustCompile(`(?s)#(.+)`),
    82  			regexp.MustCompile(`(?s)/{2}(.+)`),
    83  		},
    84  		"blockStart": []*regexp.Regexp{
    85  			regexp.MustCompile(`(?ms)/\*(.+)`),
    86  		},
    87  		"blockEnd": []*regexp.Regexp{
    88  			regexp.MustCompile(`(.*\*/)`),
    89  		},
    90  	},
    91  	".py": {
    92  		"inline": []*regexp.Regexp{
    93  			regexp.MustCompile(`(?s)#(.+)`),
    94  			regexp.MustCompile(`"""(.+)"""`),
    95  			regexp.MustCompile(`'''(.+)'''`),
    96  		},
    97  		"blockStart": []*regexp.Regexp{
    98  			regexp.MustCompile(`(?ms)^(?:\s{4,})?r?["']{3}(.+)$`),
    99  		},
   100  		"blockEnd": []*regexp.Regexp{
   101  			regexp.MustCompile(`(.*["']{3})`),
   102  		},
   103  	},
   104  	".rb": {
   105  		"inline": []*regexp.Regexp{
   106  			regexp.MustCompile(`(?s)#(.+)`),
   107  		},
   108  		"blockStart": []*regexp.Regexp{
   109  			regexp.MustCompile(`(?ms)^=begin(.+)`),
   110  		},
   111  		"blockEnd": []*regexp.Regexp{
   112  			regexp.MustCompile(`(^=end)`),
   113  		},
   114  	},
   115  	".lua": {
   116  		"inline": []*regexp.Regexp{
   117  			regexp.MustCompile(`(?s)-- (.+)`),
   118  		},
   119  		"blockStart": []*regexp.Regexp{
   120  			regexp.MustCompile(`(?ms)^-{2,3}\[\[(.*)`),
   121  		},
   122  		"blockEnd": []*regexp.Regexp{
   123  			regexp.MustCompile(`(.*\]\])`),
   124  		},
   125  	},
   126  	".hs": {
   127  		"inline": []*regexp.Regexp{
   128  			regexp.MustCompile(`(?s)-- (.+)`),
   129  		},
   130  		"blockStart": []*regexp.Regexp{
   131  			regexp.MustCompile(`(?ms)^\{-.(.*)`),
   132  		},
   133  		"blockEnd": []*regexp.Regexp{
   134  			regexp.MustCompile(`(.*-\})`),
   135  		},
   136  	},
   137  	".jl": {
   138  		"inline": []*regexp.Regexp{
   139  			regexp.MustCompile(`(?s)#(.+)`),
   140  		},
   141  		"blockStart": []*regexp.Regexp{
   142  			regexp.MustCompile(`(?ms)^(^#=)`),
   143  			regexp.MustCompile(`(?ms)^(?:@doc )?(?:raw)?["']{3}(.+)`),
   144  		},
   145  		"blockEnd": []*regexp.Regexp{
   146  			regexp.MustCompile(`(^=#)`),
   147  			regexp.MustCompile(`(.*["']{3})`),
   148  		},
   149  	},
   150  	".ps1": {
   151  		"inline": []*regexp.Regexp{
   152  			regexp.MustCompile(`(?s)#(.+)`),
   153  		},
   154  		"blockStart": []*regexp.Regexp{
   155  			regexp.MustCompile(`(?ms)^(?:<#)(.+)`),
   156  		},
   157  		"blockEnd": []*regexp.Regexp{
   158  			regexp.MustCompile(`(.*#>)`),
   159  		},
   160  	},
   161  }
   162  
   163  func trimLeading(lang, line string) string {
   164  	if core.StringInSlice(lang, []string{".jl"}) {
   165  		return line
   166  	}
   167  	return strings.TrimLeft(line, " ")
   168  }
   169  
   170  func getSubMatch(r *regexp.Regexp, s string) string {
   171  	matches := r.FindStringSubmatch(s)
   172  	for i, m := range matches {
   173  		if i > 0 && m != "" {
   174  			return m
   175  		}
   176  	}
   177  	return ""
   178  }
   179  
   180  func padding(line string) int {
   181  	return len(line) - len(strings.TrimLeft(line, " "))
   182  }
   183  
   184  func doMatch(p []*regexp.Regexp, line string) string {
   185  	for _, r := range p {
   186  		if m := getSubMatch(r, line); m != "" {
   187  			return m
   188  		}
   189  	}
   190  	return ""
   191  }
   192  
   193  func getPatterns(ext string) map[string][]*regexp.Regexp {
   194  	for r, f := range core.FormatByExtension {
   195  		m, _ := regexp.MatchString(r, ext)
   196  		if m {
   197  			return patterns[f[0]]
   198  		}
   199  	}
   200  	return map[string][]*regexp.Regexp{}
   201  }
   202  
   203  func getComments(content, ext string) []Comment {
   204  	var comments []Comment
   205  	var lines, start int
   206  	var inBlock, ignore bool
   207  	var block bytes.Buffer
   208  
   209  	scanner := bufio.NewScanner(strings.NewReader(content))
   210  
   211  	byLang := getPatterns(ext)
   212  	if len(byLang) == 0 {
   213  		return comments
   214  	}
   215  
   216  	scanner.Split(core.SplitLines)
   217  	for scanner.Scan() {
   218  		line := scanner.Text() + "\n"
   219  
   220  		lines++
   221  		if inBlock {
   222  			// We're in a block comment.
   223  			if match := doMatch(byLang["blockEnd"], line); len(match) > 0 {
   224  				// We've found the end of the block.
   225  
   226  				comments = append(comments, Comment{
   227  					Text:   block.String(),
   228  					Line:   start,
   229  					Offset: padding(line),
   230  					Scope:  "text.comment.block",
   231  				})
   232  
   233  				block.Reset()
   234  				inBlock = false
   235  			} else {
   236  				block.WriteString(trimLeading(ext, line))
   237  			}
   238  		} else if match := doMatch(byLang["inline"], line); len(match) > 0 {
   239  			// We've found an inline comment.
   240  			//
   241  			// We need padding here in order to calculate the column
   242  			// span because, for example, a line like  'print("foo") # ...'
   243  			// will be condensed to '# ...'.
   244  			comments = append(comments, Comment{
   245  				Text:   match,
   246  				Line:   lines,
   247  				Offset: strings.Index(line, match),
   248  				Scope:  "text.comment.line",
   249  			})
   250  		} else if match = doMatch(byLang["blockStart"], line); len(match) > 0 && !ignore {
   251  			// We've found the start of a block comment.
   252  			block.WriteString(match)
   253  			start = lines
   254  			inBlock = true
   255  		} else if match = doMatch(byLang["blockEnd"], line); len(match) > 0 {
   256  			ignore = !ignore
   257  		}
   258  	}
   259  
   260  	return comments
   261  }