github.com/errata-ai/vale/v3@v3.4.2/internal/lint/comments.go (about) 1 package lint 2 3 import ( 4 "bufio" 5 "bytes" 6 "regexp" 7 "strings" 8 9 "github.com/errata-ai/vale/v3/internal/core" 10 ) 11 12 // Comment represents an in-code comment (line or block). 13 type Comment struct { 14 Text string 15 Line int 16 Offset int 17 Scope string 18 } 19 20 // NOTE: This is different from `internal/core/format.go` because we need to 21 // handle each comment type separately in order to strip the prefixes 22 // (e.g., "//" or "/*") from the matched text. 23 // 24 // It's also important to note that this is certainly the *wrong* way to do 25 // this. We should handle code the same way we do markup -- by offloading the 26 // parsing duties to dedicated libraries. 27 // 28 // In practice, the best option is probably to use `tree-sitter` (see the 29 // relevant branch). However, the dependency is requires `CGO_ENABLED` and 30 // nearly triples the size of the compiled binary. So ... we'll see. 31 var patterns = map[string]map[string][]*regexp.Regexp{ 32 ".c": { 33 "inline": []*regexp.Regexp{ 34 regexp.MustCompile(`(?s)/\*(.+)\*/`), 35 regexp.MustCompile(`(?s)/{2}(.+)`), 36 }, 37 "blockStart": []*regexp.Regexp{ 38 regexp.MustCompile(`(?ms)/\*(.+)`), 39 }, 40 "blockEnd": []*regexp.Regexp{ 41 regexp.MustCompile(`(.*\*/)`), 42 }, 43 }, 44 ".clj": { 45 "inline": []*regexp.Regexp{ 46 regexp.MustCompile(`(?s);+(.+)`), 47 }, 48 "blockStart": []*regexp.Regexp{}, 49 "blockEnd": []*regexp.Regexp{}, 50 }, 51 ".css": { 52 "inline": []*regexp.Regexp{ 53 regexp.MustCompile(`(?s)/\*(.+)\*/`), 54 }, 55 "blockStart": []*regexp.Regexp{ 56 regexp.MustCompile(`(?ms)/\*(.+)`), 57 }, 58 "blockEnd": []*regexp.Regexp{ 59 regexp.MustCompile(`(.*\*/)`), 60 }, 61 }, 62 ".rs": { 63 "inline": []*regexp.Regexp{ 64 regexp.MustCompile(`(?s)/{3}!(.+)`), 65 regexp.MustCompile(`(?s)/{3}(.+)`), 66 regexp.MustCompile(`(?s)/{2}(.+)`), 67 }, 68 "blockStart": []*regexp.Regexp{}, 69 "blockEnd": []*regexp.Regexp{}, 70 }, 71 ".r": { 72 "inline": []*regexp.Regexp{ 73 regexp.MustCompile(`(?s)#(.+)`), 74 }, 75 "blockStart": []*regexp.Regexp{}, 76 "blockEnd": []*regexp.Regexp{}, 77 }, 78 ".php": { 79 "inline": []*regexp.Regexp{ 80 regexp.MustCompile(`(?s)/\*(.+)\*/`), 81 regexp.MustCompile(`(?s)#(.+)`), 82 regexp.MustCompile(`(?s)/{2}(.+)`), 83 }, 84 "blockStart": []*regexp.Regexp{ 85 regexp.MustCompile(`(?ms)/\*(.+)`), 86 }, 87 "blockEnd": []*regexp.Regexp{ 88 regexp.MustCompile(`(.*\*/)`), 89 }, 90 }, 91 ".py": { 92 "inline": []*regexp.Regexp{ 93 regexp.MustCompile(`(?s)#(.+)`), 94 regexp.MustCompile(`"""(.+)"""`), 95 regexp.MustCompile(`'''(.+)'''`), 96 }, 97 "blockStart": []*regexp.Regexp{ 98 regexp.MustCompile(`(?ms)^(?:\s{4,})?r?["']{3}(.+)$`), 99 }, 100 "blockEnd": []*regexp.Regexp{ 101 regexp.MustCompile(`(.*["']{3})`), 102 }, 103 }, 104 ".rb": { 105 "inline": []*regexp.Regexp{ 106 regexp.MustCompile(`(?s)#(.+)`), 107 }, 108 "blockStart": []*regexp.Regexp{ 109 regexp.MustCompile(`(?ms)^=begin(.+)`), 110 }, 111 "blockEnd": []*regexp.Regexp{ 112 regexp.MustCompile(`(^=end)`), 113 }, 114 }, 115 ".lua": { 116 "inline": []*regexp.Regexp{ 117 regexp.MustCompile(`(?s)-- (.+)`), 118 }, 119 "blockStart": []*regexp.Regexp{ 120 regexp.MustCompile(`(?ms)^-{2,3}\[\[(.*)`), 121 }, 122 "blockEnd": []*regexp.Regexp{ 123 regexp.MustCompile(`(.*\]\])`), 124 }, 125 }, 126 ".hs": { 127 "inline": []*regexp.Regexp{ 128 regexp.MustCompile(`(?s)-- (.+)`), 129 }, 130 "blockStart": []*regexp.Regexp{ 131 regexp.MustCompile(`(?ms)^\{-.(.*)`), 132 }, 133 "blockEnd": []*regexp.Regexp{ 134 regexp.MustCompile(`(.*-\})`), 135 }, 136 }, 137 ".jl": { 138 "inline": []*regexp.Regexp{ 139 regexp.MustCompile(`(?s)#(.+)`), 140 }, 141 "blockStart": []*regexp.Regexp{ 142 regexp.MustCompile(`(?ms)^(^#=)`), 143 regexp.MustCompile(`(?ms)^(?:@doc )?(?:raw)?["']{3}(.+)`), 144 }, 145 "blockEnd": []*regexp.Regexp{ 146 regexp.MustCompile(`(^=#)`), 147 regexp.MustCompile(`(.*["']{3})`), 148 }, 149 }, 150 ".ps1": { 151 "inline": []*regexp.Regexp{ 152 regexp.MustCompile(`(?s)#(.+)`), 153 }, 154 "blockStart": []*regexp.Regexp{ 155 regexp.MustCompile(`(?ms)^(?:<#)(.+)`), 156 }, 157 "blockEnd": []*regexp.Regexp{ 158 regexp.MustCompile(`(.*#>)`), 159 }, 160 }, 161 } 162 163 func trimLeading(lang, line string) string { 164 if core.StringInSlice(lang, []string{".jl"}) { 165 return line 166 } 167 return strings.TrimLeft(line, " ") 168 } 169 170 func getSubMatch(r *regexp.Regexp, s string) string { 171 matches := r.FindStringSubmatch(s) 172 for i, m := range matches { 173 if i > 0 && m != "" { 174 return m 175 } 176 } 177 return "" 178 } 179 180 func padding(line string) int { 181 return len(line) - len(strings.TrimLeft(line, " ")) 182 } 183 184 func doMatch(p []*regexp.Regexp, line string) string { 185 for _, r := range p { 186 if m := getSubMatch(r, line); m != "" { 187 return m 188 } 189 } 190 return "" 191 } 192 193 func getPatterns(ext string) map[string][]*regexp.Regexp { 194 for r, f := range core.FormatByExtension { 195 m, _ := regexp.MatchString(r, ext) 196 if m { 197 return patterns[f[0]] 198 } 199 } 200 return map[string][]*regexp.Regexp{} 201 } 202 203 func getComments(content, ext string) []Comment { 204 var comments []Comment 205 var lines, start int 206 var inBlock, ignore bool 207 var block bytes.Buffer 208 209 scanner := bufio.NewScanner(strings.NewReader(content)) 210 211 byLang := getPatterns(ext) 212 if len(byLang) == 0 { 213 return comments 214 } 215 216 scanner.Split(core.SplitLines) 217 for scanner.Scan() { 218 line := scanner.Text() + "\n" 219 220 lines++ 221 if inBlock { 222 // We're in a block comment. 223 if match := doMatch(byLang["blockEnd"], line); len(match) > 0 { 224 // We've found the end of the block. 225 226 comments = append(comments, Comment{ 227 Text: block.String(), 228 Line: start, 229 Offset: padding(line), 230 Scope: "text.comment.block", 231 }) 232 233 block.Reset() 234 inBlock = false 235 } else { 236 block.WriteString(trimLeading(ext, line)) 237 } 238 } else if match := doMatch(byLang["inline"], line); len(match) > 0 { 239 // We've found an inline comment. 240 // 241 // We need padding here in order to calculate the column 242 // span because, for example, a line like 'print("foo") # ...' 243 // will be condensed to '# ...'. 244 comments = append(comments, Comment{ 245 Text: match, 246 Line: lines, 247 Offset: strings.Index(line, match), 248 Scope: "text.comment.line", 249 }) 250 } else if match = doMatch(byLang["blockStart"], line); len(match) > 0 && !ignore { 251 // We've found the start of a block comment. 252 block.WriteString(match) 253 start = lines 254 inBlock = true 255 } else if match = doMatch(byLang["blockEnd"], line); len(match) > 0 { 256 ignore = !ignore 257 } 258 } 259 260 return comments 261 }