github.com/errata-ai/vale/v3@v3.4.2/internal/core/format.go (about) 1 package core 2 3 import ( 4 "path/filepath" 5 "regexp" 6 "strings" 7 ) 8 9 // CommentsByNormedExt determines what parts of a file we should lint -- e.g., 10 // we only want to lint // or /* comments in a C++ file. Multiple formats are 11 // mapped to a single extension (e.g., .java -> .c) because many languages use 12 // the same comment delimiters. 13 var CommentsByNormedExt = map[string]map[string]string{ 14 ".c": { 15 "inline": `(?:^|\s)(?:(//.+)|(/\*.+\*/))`, 16 "blockStart": `(/\*.*)`, 17 "blockEnd": `(.*\*/)`, 18 }, 19 ".clj": { 20 "inline": `(;+.+)`, 21 "blockStart": `$^`, 22 "blockEnd": `$^`, 23 }, 24 ".css": { 25 "inline": `(/\*.+\*/)`, 26 "blockStart": `(/\*.*)`, 27 "blockEnd": `(.*\*/)`, 28 }, 29 ".rs": { 30 "inline": `(//.+)`, 31 "blockStart": `$^`, 32 "blockEnd": `$^`, 33 }, 34 ".r": { 35 "inline": `(#.+)`, 36 "blockStart": `$^`, 37 "blockEnd": `$^`, 38 }, 39 ".py": { 40 "inline": `(#.*)|('{3}.+'{3})|("{3}.+"{3})`, 41 "blockStart": `(?m)^((?:\s{4,})?[r]?["']{3}.*)$`, 42 "blockEnd": `(.*["']{3})`, 43 }, 44 ".ps1": { 45 "inline": `(#.+)`, 46 "blockStart": `(<#.*)`, 47 "blockEnd": `(.*#>)`, 48 }, 49 ".php": { 50 "inline": `(//.+)|(/\*.+\*/)|(#.+)`, 51 "blockStart": `(/\*.*)`, 52 "blockEnd": `(.*\*/)`, 53 }, 54 ".lua": { 55 "inline": `(-- .+)`, 56 "blockStart": `(-{2,3}\[\[.*)`, 57 "blockEnd": `(.*\]\])`, 58 }, 59 ".hs": { 60 "inline": `(-- .+)`, 61 "blockStart": `(\{-.*)`, 62 "blockEnd": `(.*-\})`, 63 }, 64 ".rb": { 65 "inline": `(#.+)`, 66 "blockStart": `(^=begin)`, 67 "blockEnd": `(^=end)`, 68 }, 69 ".jl": { 70 "inline": `(# .+)`, 71 "blockStart": `(^#=)|(^(?:@doc )?(?:raw)?["']{3}.*)`, 72 "blockEnd": `(^=#)|(.*["']{3})`, 73 }, 74 } 75 76 // FormatByExtension associates a file extension with its "normed" extension 77 // and its format (markup, code or text). 78 var FormatByExtension = map[string][]string{ 79 `\.(?:[rc]?py[3w]?|[Ss][Cc]onstruct)$`: {".py", "code"}, 80 `\.(?:adoc|asciidoc|asc)$`: {".adoc", "markup"}, 81 `\.(?:cpp|cc|c|cp|cxx|c\+\+|h|hpp|h\+\+)$`: {".c", "code"}, 82 `\.(?:cs|csx)$`: {".c", "code"}, 83 `\.(?:clj|cljs|cljc|cljd)$`: {".clj", "code"}, 84 `\.(?:css)$`: {".css", "code"}, 85 `\.(?:go)$`: {".c", "code"}, 86 `\.(?:html|htm|shtml|xhtml)$`: {".html", "markup"}, 87 `\.(?:rb|Gemfile|Rakefile|Brewfile|gemspec)$`: {".rb", "code"}, 88 `\.(?:java|bsh)$`: {".c", "code"}, 89 `\.(?:js|jsx)$`: {".c", "code"}, 90 `\.(?:lua)$`: {".lua", "code"}, 91 `\.(?:md|mdown|markdown|markdn)$`: {".md", "markup"}, 92 `\.(?:php)$`: {".php", "code"}, 93 `\.(?:pl|pm|pod)$`: {".r", "code"}, 94 `\.(?:ps1|psm1|psd1)$`: {".ps1", "code"}, 95 `\.(?:r|R)$`: {".r", "code"}, 96 `\.(?:rs)$`: {".rs", "code"}, 97 `\.(?:rst|rest)$`: {".rst", "markup"}, 98 `\.(?:swift)$`: {".c", "code"}, 99 `\.(?:ts|tsx)$`: {".c", "code"}, 100 `\.(?:txt)$`: {".txt", "text"}, 101 `\.(?:sass|less)$`: {".c", "code"}, 102 `\.(?:scala|sbt)$`: {".c", "code"}, 103 `\.(?:hs)$`: {".hs", "code"}, 104 `\.(?:xml)$`: {".xml", "markup"}, 105 `\.(?:dita)$`: {".dita", "markup"}, 106 `\.(?:org)$`: {".org", "markup"}, 107 `\.(?:jl)$`: {".jl", "code"}, 108 `\.(?:proto)$`: {".c", "code"}, 109 } 110 111 // FormatFromExt takes a file extension and returns its [normExt, format] 112 // list, if supported. 113 func FormatFromExt(path string, mapping map[string]string) (string, string) { 114 base := strings.Trim(filepath.Ext(path), ".") 115 kind := getFormat("." + base) 116 117 if format, found := mapping[base]; found { 118 if kind == "code" && getFormat("."+format) == "markup" { 119 // NOTE: This is a special case of embedded markup within code. 120 return "." + format, "fragment" 121 } 122 base = format 123 } 124 125 base = "." + base 126 for r, f := range FormatByExtension { 127 m, _ := regexp.MatchString(r, base) 128 if m { 129 return f[0], f[1] 130 } 131 } 132 133 return "unknown", "unknown" 134 } 135 136 func getFormat(ext string) string { 137 for r, f := range FormatByExtension { 138 m, _ := regexp.MatchString(r, ext) 139 if m { 140 return f[1] 141 } 142 } 143 return "" 144 }