github.com/errata-ai/vale/v3@v3.4.2/internal/core/format.go (about)

     1  package core
     2  
     3  import (
     4  	"path/filepath"
     5  	"regexp"
     6  	"strings"
     7  )
     8  
     9  // CommentsByNormedExt determines what parts of a file we should lint -- e.g.,
    10  // we only want to lint // or /* comments in a C++ file. Multiple formats are
    11  // mapped to a single extension (e.g., .java -> .c) because many languages use
    12  // the same comment delimiters.
    13  var CommentsByNormedExt = map[string]map[string]string{
    14  	".c": {
    15  		"inline":     `(?:^|\s)(?:(//.+)|(/\*.+\*/))`,
    16  		"blockStart": `(/\*.*)`,
    17  		"blockEnd":   `(.*\*/)`,
    18  	},
    19  	".clj": {
    20  		"inline":     `(;+.+)`,
    21  		"blockStart": `$^`,
    22  		"blockEnd":   `$^`,
    23  	},
    24  	".css": {
    25  		"inline":     `(/\*.+\*/)`,
    26  		"blockStart": `(/\*.*)`,
    27  		"blockEnd":   `(.*\*/)`,
    28  	},
    29  	".rs": {
    30  		"inline":     `(//.+)`,
    31  		"blockStart": `$^`,
    32  		"blockEnd":   `$^`,
    33  	},
    34  	".r": {
    35  		"inline":     `(#.+)`,
    36  		"blockStart": `$^`,
    37  		"blockEnd":   `$^`,
    38  	},
    39  	".py": {
    40  		"inline":     `(#.*)|('{3}.+'{3})|("{3}.+"{3})`,
    41  		"blockStart": `(?m)^((?:\s{4,})?[r]?["']{3}.*)$`,
    42  		"blockEnd":   `(.*["']{3})`,
    43  	},
    44  	".ps1": {
    45  		"inline":     `(#.+)`,
    46  		"blockStart": `(<#.*)`,
    47  		"blockEnd":   `(.*#>)`,
    48  	},
    49  	".php": {
    50  		"inline":     `(//.+)|(/\*.+\*/)|(#.+)`,
    51  		"blockStart": `(/\*.*)`,
    52  		"blockEnd":   `(.*\*/)`,
    53  	},
    54  	".lua": {
    55  		"inline":     `(-- .+)`,
    56  		"blockStart": `(-{2,3}\[\[.*)`,
    57  		"blockEnd":   `(.*\]\])`,
    58  	},
    59  	".hs": {
    60  		"inline":     `(-- .+)`,
    61  		"blockStart": `(\{-.*)`,
    62  		"blockEnd":   `(.*-\})`,
    63  	},
    64  	".rb": {
    65  		"inline":     `(#.+)`,
    66  		"blockStart": `(^=begin)`,
    67  		"blockEnd":   `(^=end)`,
    68  	},
    69  	".jl": {
    70  		"inline":     `(# .+)`,
    71  		"blockStart": `(^#=)|(^(?:@doc )?(?:raw)?["']{3}.*)`,
    72  		"blockEnd":   `(^=#)|(.*["']{3})`,
    73  	},
    74  }
    75  
    76  // FormatByExtension associates a file extension with its "normed" extension
    77  // and its format (markup, code or text).
    78  var FormatByExtension = map[string][]string{
    79  	`\.(?:[rc]?py[3w]?|[Ss][Cc]onstruct)$`:        {".py", "code"},
    80  	`\.(?:adoc|asciidoc|asc)$`:                    {".adoc", "markup"},
    81  	`\.(?:cpp|cc|c|cp|cxx|c\+\+|h|hpp|h\+\+)$`:    {".c", "code"},
    82  	`\.(?:cs|csx)$`:                               {".c", "code"},
    83  	`\.(?:clj|cljs|cljc|cljd)$`:                   {".clj", "code"},
    84  	`\.(?:css)$`:                                  {".css", "code"},
    85  	`\.(?:go)$`:                                   {".c", "code"},
    86  	`\.(?:html|htm|shtml|xhtml)$`:                 {".html", "markup"},
    87  	`\.(?:rb|Gemfile|Rakefile|Brewfile|gemspec)$`: {".rb", "code"},
    88  	`\.(?:java|bsh)$`:                             {".c", "code"},
    89  	`\.(?:js|jsx)$`:                               {".c", "code"},
    90  	`\.(?:lua)$`:                                  {".lua", "code"},
    91  	`\.(?:md|mdown|markdown|markdn)$`:             {".md", "markup"},
    92  	`\.(?:php)$`:                                  {".php", "code"},
    93  	`\.(?:pl|pm|pod)$`:                            {".r", "code"},
    94  	`\.(?:ps1|psm1|psd1)$`:                        {".ps1", "code"},
    95  	`\.(?:r|R)$`:                                  {".r", "code"},
    96  	`\.(?:rs)$`:                                   {".rs", "code"},
    97  	`\.(?:rst|rest)$`:                             {".rst", "markup"},
    98  	`\.(?:swift)$`:                                {".c", "code"},
    99  	`\.(?:ts|tsx)$`:                               {".c", "code"},
   100  	`\.(?:txt)$`:                                  {".txt", "text"},
   101  	`\.(?:sass|less)$`:                            {".c", "code"},
   102  	`\.(?:scala|sbt)$`:                            {".c", "code"},
   103  	`\.(?:hs)$`:                                   {".hs", "code"},
   104  	`\.(?:xml)$`:                                  {".xml", "markup"},
   105  	`\.(?:dita)$`:                                 {".dita", "markup"},
   106  	`\.(?:org)$`:                                  {".org", "markup"},
   107  	`\.(?:jl)$`:                                   {".jl", "code"},
   108  	`\.(?:proto)$`:                                {".c", "code"},
   109  }
   110  
   111  // FormatFromExt takes a file extension and returns its [normExt, format]
   112  // list, if supported.
   113  func FormatFromExt(path string, mapping map[string]string) (string, string) {
   114  	base := strings.Trim(filepath.Ext(path), ".")
   115  	kind := getFormat("." + base)
   116  
   117  	if format, found := mapping[base]; found {
   118  		if kind == "code" && getFormat("."+format) == "markup" {
   119  			// NOTE: This is a special case of embedded markup within code.
   120  			return "." + format, "fragment"
   121  		}
   122  		base = format
   123  	}
   124  
   125  	base = "." + base
   126  	for r, f := range FormatByExtension {
   127  		m, _ := regexp.MatchString(r, base)
   128  		if m {
   129  			return f[0], f[1]
   130  		}
   131  	}
   132  
   133  	return "unknown", "unknown"
   134  }
   135  
   136  func getFormat(ext string) string {
   137  	for r, f := range FormatByExtension {
   138  		m, _ := regexp.MatchString(r, ext)
   139  		if m {
   140  			return f[1]
   141  		}
   142  	}
   143  	return ""
   144  }