src.elv.sh@v0.21.0-dev.0.20240515223629-06979efb9a2a/pkg/md/fmt_test.go

src.elv.sh@v0.21.0-dev.0.20240515223629-06979efb9a2a/pkg/md/fmt_test.go (about)

     1  package md_test
     2  
     3  import (
     4  	"fmt"
     5  	"html"
     6  	"regexp"
     7  	"strings"
     8  	"testing"
     9  	"unicode/utf8"
    10  
    11  	"github.com/google/go-cmp/cmp"
    12  	. "src.elv.sh/pkg/md"
    13  	"src.elv.sh/pkg/testutil"
    14  	"src.elv.sh/pkg/wcwidth"
    15  )
    16  
    17  var supplementalFmtCases = []testCase{
    18  	{
    19  		Section:  "Fenced code blocks",
    20  		Name:     "Tilde fence with info starting with tilde",
    21  		Markdown: "~~~ ~`\n" + "~~~",
    22  	},
    23  	{
    24  		Section:  "Emphasis and strong emphasis",
    25  		Name:     "Space at start of content",
    26  		Markdown: "*&#32;x*",
    27  	},
    28  	{
    29  		Section:  "Emphasis and strong emphasis",
    30  		Name:     "Space at end of content",
    31  		Markdown: "*x&#32;*",
    32  	},
    33  	{
    34  		Section:  "Emphasis and strong emphasis",
    35  		Name:     "Emphasis opener after word before punctuation",
    36  		Markdown: "&#65;*!*",
    37  	},
    38  	{
    39  		Section:  "Emphasis and strong emphasis",
    40  		Name:     "Emphasis closer after punctuation before word",
    41  		Markdown: "*!*&#65;",
    42  	},
    43  	{
    44  		Section:  "Emphasis and strong emphasis",
    45  		Name:     "Space-only content",
    46  		Markdown: "*&#32;*",
    47  	},
    48  	{
    49  		Section:  "Links",
    50  		Name:     "Exclamation mark before link",
    51  		Markdown: `\![a](b)`,
    52  	},
    53  	{
    54  		Section:  "Links",
    55  		Name:     "Link title with both single and double quotes",
    56  		Markdown: `[a](b ('"))`,
    57  	},
    58  	{
    59  		Section:  "Links",
    60  		Name:     "Link title with fewer double quotes than single quotes and parens",
    61  		Markdown: `[a](b "\"''()")`,
    62  	},
    63  	{
    64  		Section:  "Links",
    65  		Name:     "Link title with fewer single quotes than double quotes and parens",
    66  		Markdown: `[a](b '\'""()')`,
    67  	},
    68  	{
    69  		Section:  "Links",
    70  		Name:     "Link title with fewer parens than single and double quotes",
    71  		Markdown: `[a](b (\(''""))`,
    72  	},
    73  	{
    74  		Section:  "Links",
    75  		Name:     "Newline in link destination",
    76  		Markdown: `[a](<&NewLine;>)`,
    77  	},
    78  	{
    79  		Section:  "Soft line breaks",
    80  		Name:     "Space at start of line",
    81  		Markdown: "&#32;foo",
    82  	},
    83  	{
    84  		Section:  "Soft line breaks",
    85  		Name:     "Space at end of line",
    86  		Markdown: "foo&#32;",
    87  	},
    88  }
    89  
    90  var fmtTestCases = concat(htmlTestCases, supplementalFmtCases)
    91  
    92  func TestFmtPreservesHTMLRender(t *testing.T) {
    93  	testutil.Set(t, &UnescapeHTML, html.UnescapeString)
    94  	for _, tc := range fmtTestCases {
    95  		t.Run(tc.testName(), func(t *testing.T) {
    96  			testFmtPreservesHTMLRender(t, tc.Markdown)
    97  		})
    98  	}
    99  }
   100  
   101  func FuzzFmtPreservesHTMLRender(f *testing.F) {
   102  	for _, tc := range fmtTestCases {
   103  		f.Add(tc.Markdown)
   104  	}
   105  	f.Fuzz(testFmtPreservesHTMLRender)
   106  }
   107  
   108  func testFmtPreservesHTMLRender(t *testing.T, original string) {
   109  	testFmtPreservesHTMLRenderModulo(t, original, 0, nil)
   110  }
   111  
   112  func TestReflowFmtPreservesHTMLRenderModuleWhitespaces(t *testing.T) {
   113  	testReflowFmt(t, testReflowFmtPreservesHTMLRenderModuloWhitespaces)
   114  }
   115  
   116  func FuzzReflowFmtPreservesHTMLRenderModuleWhitespaces(f *testing.F) {
   117  	fuzzReflowFmt(f, testReflowFmtPreservesHTMLRenderModuloWhitespaces)
   118  }
   119  
   120  var (
   121  	paragraph         = regexp.MustCompile(`(?s)<p>.*?</p>`)
   122  	whitespaceRun     = regexp.MustCompile(`[ \t\n]+`)
   123  	brWithWhitespaces = regexp.MustCompile(`[ \t\n]*<br />[ \t\n]*`)
   124  )
   125  
   126  func testReflowFmtPreservesHTMLRenderModuloWhitespaces(t *testing.T, original string, w int) {
   127  	if strings.Contains(original, "<p>") {
   128  		t.Skip("markdown contains <p>")
   129  	}
   130  	if strings.Contains(original, "</p>") {
   131  		t.Skip("markdown contains </p>")
   132  	}
   133  	testFmtPreservesHTMLRenderModulo(t, original, w, func(html string) string {
   134  		// Coalesce whitespaces in each paragraph.
   135  		return paragraph.ReplaceAllStringFunc(html, func(p string) string {
   136  			body := strings.Trim(p[3:len(p)-4], " \t\n")
   137  			// Convert each whitespace run to a single space.
   138  			body = whitespaceRun.ReplaceAllLiteralString(body, " ")
   139  			// Remove whitespaces around <br />.
   140  			body = brWithWhitespaces.ReplaceAllLiteralString(body, "<br />")
   141  			return "<p>" + body + "</p>"
   142  		})
   143  	})
   144  }
   145  
   146  func TestReflowFmtResultIsUnchangedUnderFmt(t *testing.T) {
   147  	testReflowFmt(t, testReflowFmtResultIsUnchangedUnderFmt)
   148  }
   149  
   150  func FuzzReflowFmtResultIsUnchangedUnderFmt(f *testing.F) {
   151  	fuzzReflowFmt(f, testReflowFmtResultIsUnchangedUnderFmt)
   152  }
   153  
   154  func testReflowFmtResultIsUnchangedUnderFmt(t *testing.T, original string, w int) {
   155  	reflowed := formatAndSkipIfUnsupported(t, original, w)
   156  	formatted := RenderString(reflowed, &FmtCodec{})
   157  	if reflowed != formatted {
   158  		t.Errorf("original:\n%s\nreflowed:\n%s\nformatted:\n%s"+
   159  			"markdown diff (-reflowed +formatted):\n%s",
   160  			hr+"\n"+original+hr, hr+"\n"+reflowed+hr, hr+"\n"+formatted+hr,
   161  			cmp.Diff(reflowed, formatted))
   162  	}
   163  }
   164  
   165  func TestReflowFmtResultFitsInWidth(t *testing.T) {
   166  	testReflowFmt(t, testReflowFmtResultFitsInWidth)
   167  }
   168  
   169  func FuzzReflowFmtResultFitsInWidth(f *testing.F) {
   170  	fuzzReflowFmt(f, testReflowFmtResultFitsInWidth)
   171  }
   172  
   173  var (
   174  	// Match all markers that can be written by FmtCodec.
   175  	markersRegexp  = regexp.MustCompile(`^ *(?:(?:[-*>]|[0-9]{1,9}[.)]) *)*`)
   176  	linkRegexp     = regexp.MustCompile(`\[.*\]\(.*\)`)
   177  	codeSpanRegexp = regexp.MustCompile("`.*`")
   178  )
   179  
   180  func testReflowFmtResultFitsInWidth(t *testing.T, original string, w int) {
   181  	if w <= 0 {
   182  		t.Skip("width <= 0")
   183  	}
   184  
   185  	var trace TraceCodec
   186  	Render(original, &trace)
   187  	for _, op := range trace.Ops() {
   188  		switch op.Type {
   189  		case OpHeading, OpCodeBlock, OpHTMLBlock:
   190  			t.Skipf("input contains unsupported block type %s", op.Type)
   191  		}
   192  	}
   193  
   194  	reflowed := formatAndSkipIfUnsupported(t, original, w)
   195  
   196  	for _, line := range strings.Split(reflowed, "\n") {
   197  		lineWidth := wcwidth.Of(line)
   198  		if lineWidth <= w {
   199  			continue
   200  		}
   201  		// Strip all markers
   202  		content := line[len(markersRegexp.FindString(line)):]
   203  		// Analyze whether the content is allowed to exceed width
   204  		switch {
   205  		case !strings.Contains(content, " "):
   206  		case strings.Contains(content, "<"):
   207  		case linkRegexp.MatchString(content):
   208  		case codeSpanRegexp.MatchString(content):
   209  		default:
   210  			t.Errorf("line length > %d: %q\nfull reflowed:\n%s",
   211  				w, line, hr+"\n"+reflowed+hr)
   212  		}
   213  	}
   214  }
   215  
   216  var widths = []int{20, 51, 80}
   217  
   218  func testReflowFmt(t *testing.T, test func(*testing.T, string, int)) {
   219  	for _, tc := range fmtTestCases {
   220  		for _, w := range widths {
   221  			t.Run(fmt.Sprintf("%s/Width %d", tc.testName(), w), func(t *testing.T) {
   222  				test(t, tc.Markdown, w)
   223  			})
   224  		}
   225  	}
   226  }
   227  
   228  func fuzzReflowFmt(f *testing.F, test func(*testing.T, string, int)) {
   229  	for _, tc := range fmtTestCases {
   230  		for _, w := range widths {
   231  			f.Add(tc.Markdown, w)
   232  		}
   233  	}
   234  	f.Fuzz(test)
   235  }
   236  
   237  func testFmtPreservesHTMLRenderModulo(t *testing.T, original string, w int, processHTML func(string) string) {
   238  	formatted := formatAndSkipIfUnsupported(t, original, w)
   239  	originalRender := RenderString(original, &HTMLCodec{})
   240  	formattedRender := RenderString(formatted, &HTMLCodec{})
   241  	if processHTML != nil {
   242  		originalRender = processHTML(originalRender)
   243  		formattedRender = processHTML(formattedRender)
   244  	}
   245  	if formattedRender != originalRender {
   246  		t.Errorf("original:\n%s\nformatted:\n%s\n"+
   247  			"markdown diff (-original +formatted):\n%s"+
   248  			"HTML diff (-original +formatted):\n%s"+
   249  			"ops diff (-original +formatted):\n%s",
   250  			hr+"\n"+original+hr, hr+"\n"+formatted+hr,
   251  			cmp.Diff(original, formatted),
   252  			cmp.Diff(originalRender, formattedRender),
   253  			cmp.Diff(RenderString(original, &TraceCodec{}), RenderString(formatted, &TraceCodec{})))
   254  	}
   255  }
   256  
   257  func formatAndSkipIfUnsupported(t *testing.T, original string, w int) string {
   258  	if !utf8.ValidString(original) {
   259  		t.Skipf("input is not valid UTF-8")
   260  	}
   261  	if strings.Contains(original, "\t") {
   262  		t.Skipf("input contains tab")
   263  	}
   264  	codec := &FmtCodec{Width: w}
   265  	formatted := RenderString(original, codec)
   266  	if u := codec.Unsupported(); u != nil {
   267  		t.Skipf("input uses unsupported feature: %v", u)
   268  	}
   269  	return formatted
   270  }