github.com/errata-ai/vale/v3@v3.4.2/internal/core/markup.go (about)

     1  // The MIT License (MIT)
     2  
     3  // Copyright (c) 2016 Write.as
     4  
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  
    12  // The above copyright notice and this permission notice shall be included in all
    13  // copies or substantial portions of the Software.
    14  
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    21  // SOFTWARE.
    22  
    23  package core
    24  
    25  import (
    26  	"regexp"
    27  )
    28  
    29  var (
    30  	listLeadersReg = regexp.MustCompile(`(?m)^([\s\t]*)([\*\-\+]|\d\.)\s+`)
    31  
    32  	headerReg = regexp.MustCompile(`\n={2,}`)
    33  	strikeReg = regexp.MustCompile(`~~`)
    34  	codeReg   = regexp.MustCompile("`{3}" + `.*\n`)
    35  
    36  	htmlReg         = regexp.MustCompile("<(.*?)>")
    37  	emphReg         = regexp.MustCompile(`\*\*([^*]+)\*\*`)
    38  	emphReg2        = regexp.MustCompile(`\*([^*]+)\*`)
    39  	emphReg3        = regexp.MustCompile(`__([^_]+)__`)
    40  	emphReg4        = regexp.MustCompile(`_([^_]+)_`)
    41  	setextHeaderReg = regexp.MustCompile(`^[=\-]{2,}\s*$`)
    42  	footnotesReg    = regexp.MustCompile(`\[\^.+?\](\: .*?$)?`)
    43  	footnotes2Reg   = regexp.MustCompile(`\s{0,2}\[.*?\]: .*?$`)
    44  	imagesReg       = regexp.MustCompile(`\!\[(.*?)\]\s?[\[\(].*?[\]\)]`)
    45  	linksReg        = regexp.MustCompile(`\[(.*?)\][\[\(].*?[\]\)]`)
    46  	blockquoteReg   = regexp.MustCompile(`>\s*`)
    47  	refLinkReg      = regexp.MustCompile(`^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$`)
    48  	atxHeaderReg    = regexp.MustCompile(`(?m)^\#{1,6}\s*([^#]+)\s*(\#{1,6})?$`)
    49  	atxHeaderReg2   = regexp.MustCompile(`([\*_]{1,3})(\S.*?\S)?P1`)
    50  	atxHeaderReg3   = regexp.MustCompile("(?m)(`{3,})" + `(.*?)?P1`)
    51  	atxHeaderReg4   = regexp.MustCompile(`^-{3,}\s*$`)
    52  	atxHeaderReg5   = regexp.MustCompile("`(.+?)`")
    53  	atxHeaderReg6   = regexp.MustCompile(`\n{2,}`)
    54  )
    55  
    56  // stripMarkdown returns the given string sans any Markdown.
    57  // Where necessary, elements are replaced with their best textual forms, so
    58  // for example, hyperlinks are stripped of their URL and become only the link
    59  // text, and images lose their URL and become only the alt text.
    60  func stripMarkdown(s string) string {
    61  	res := s
    62  	res = listLeadersReg.ReplaceAllString(res, "$1")
    63  
    64  	res = headerReg.ReplaceAllString(res, "\n")
    65  	res = strikeReg.ReplaceAllString(res, "")
    66  	res = codeReg.ReplaceAllString(res, "")
    67  
    68  	res = emphReg.ReplaceAllString(res, "$1")
    69  	res = emphReg2.ReplaceAllString(res, "$1")
    70  	res = emphReg3.ReplaceAllString(res, "$1")
    71  	res = emphReg4.ReplaceAllString(res, "$1")
    72  	res = htmlReg.ReplaceAllString(res, "$1")
    73  	res = setextHeaderReg.ReplaceAllString(res, "")
    74  	res = footnotesReg.ReplaceAllString(res, "")
    75  	res = footnotes2Reg.ReplaceAllString(res, "")
    76  	res = imagesReg.ReplaceAllString(res, "$1")
    77  	res = linksReg.ReplaceAllString(res, "$1")
    78  	res = blockquoteReg.ReplaceAllString(res, "  ")
    79  	res = refLinkReg.ReplaceAllString(res, "")
    80  	res = atxHeaderReg.ReplaceAllString(res, "$1")
    81  	res = atxHeaderReg2.ReplaceAllString(res, "$2")
    82  	res = atxHeaderReg3.ReplaceAllString(res, "$2")
    83  	res = atxHeaderReg4.ReplaceAllString(res, "")
    84  	res = atxHeaderReg5.ReplaceAllString(res, "$1")
    85  	res = atxHeaderReg6.ReplaceAllString(res, "\n\n")
    86  	return res
    87  }