github.com/btwiuse/jiri@v0.0.0-20191125065820-53353bcfef54/textutil/wrap_writer_test.go (about)

     1  // Copyright 2015 The Vanadium Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textutil
     6  
     7  import (
     8  	"bytes"
     9  	"io"
    10  	"strings"
    11  	"testing"
    12  )
    13  
    14  type lp struct {
    15  	line, para string
    16  }
    17  
    18  var (
    19  	allIndents  = [][]int{nil, {}, {1}, {2}, {1, 2}, {2, 1}}
    20  	allIndents1 = [][]int{{1}, {2}, {1, 2}, {2, 1}}
    21  )
    22  
    23  func TestWrapWriter(t *testing.T) {
    24  	tests := []struct {
    25  		Width   int
    26  		Indents [][]int
    27  		In      string // See xlateIn for details on the format
    28  		Want    string // See xlateWant for details on the format
    29  	}{
    30  		// Completely blank input yields empty output.
    31  		{4, allIndents, "", ""},
    32  		{4, allIndents, " ", ""},
    33  		{4, allIndents, "  ", ""},
    34  		{4, allIndents, "   ", ""},
    35  		{4, allIndents, "    ", ""},
    36  		{4, allIndents, "     ", ""},
    37  		{4, allIndents, "      ", ""},
    38  		{4, allIndents, "F N  R   V    L     P      ", ""},
    39  		// Single words never get word-wrapped, even if they're long.
    40  		{4, allIndents, "a", "0a."},
    41  		{4, allIndents, "ab", "0ab."},
    42  		{4, allIndents, "abc", "0abc."},
    43  		{4, allIndents, "abcd", "0abcd."},
    44  		{4, allIndents, "abcde", "0abcde."},
    45  		{4, allIndents, "abcdef", "0abcdef."},
    46  		// Word-wrapping boundary conditions.
    47  		{4, allIndents, "abc ", "0abc."},
    48  		{4, allIndents, "abc  ", "0abc."},
    49  		{4, allIndents, "abcN", "0abc."},
    50  		{4, allIndents, "abcN ", "0abc."},
    51  		{4, allIndents, "abcd ", "0abcd."},
    52  		{4, allIndents, "abcd  ", "0abcd."},
    53  		{4, allIndents, "abcdN", "0abcd."},
    54  		{4, allIndents, "abcdN ", "0abcd."},
    55  		{4, [][]int{nil}, "a cd", "0a cd."},
    56  		{4, [][]int{nil}, "a cd ", "0a cd."},
    57  		{4, [][]int{nil}, "a cdN", "0a cd."},
    58  		{4, allIndents1, "a cd", "0a.1cd."},
    59  		{4, allIndents1, "a cd ", "0a.1cd."},
    60  		{4, allIndents1, "a cdN", "0a.1cd."},
    61  		{4, allIndents, "a cde", "0a.1cde."},
    62  		{4, allIndents, "a cde ", "0a.1cde."},
    63  		{4, allIndents, "a cdeN", "0a.1cde."},
    64  		{4, [][]int{nil}, "a  d", "0a  d."},
    65  		{4, [][]int{nil}, "a  d ", "0a  d."},
    66  		{4, [][]int{nil}, "a  dN", "0a  d."},
    67  		{4, allIndents1, "a  d", "0a.1d."},
    68  		{4, allIndents1, "a  d ", "0a.1d."},
    69  		{4, allIndents1, "a  dN", "0a.1d."},
    70  		{4, allIndents, "a  de", "0a.1de."},
    71  		{4, allIndents, "a  de ", "0a.1de."},
    72  		{4, allIndents, "a  deN", "0a.1de."},
    73  		// Multi-line word-wrapping boundary conditions.
    74  		{4, allIndents, "abc e", "0abc.1e."},
    75  		{4, allIndents, "abc.e", "0abc.1e."},
    76  		{4, allIndents, "abc efgh", "0abc.1efgh."},
    77  		{4, allIndents, "abc.efgh", "0abc.1efgh."},
    78  		{4, allIndents, "abc efghi", "0abc.1efghi."},
    79  		{4, allIndents, "abc.efghi", "0abc.1efghi."},
    80  		{4, [][]int{nil}, "abc e gh", "0abc.1e gh."},
    81  		{4, [][]int{nil}, "abc.e.gh", "0abc.1e gh."},
    82  		{4, allIndents1, "abc e gh", "0abc.1e.2gh."},
    83  		{4, allIndents1, "abc.e.gh", "0abc.1e.2gh."},
    84  		{4, allIndents, "abc e ghijk", "0abc.1e.2ghijk."},
    85  		{4, allIndents, "abc.e.ghijk", "0abc.1e.2ghijk."},
    86  		// Verbatim lines.
    87  		{4, allIndents, " b", "0 b."},
    88  		{4, allIndents, "  bc", "0  bc."},
    89  		{4, allIndents, "   bcd", "0   bcd."},
    90  		{4, allIndents, "    bcde", "0    bcde."},
    91  		{4, allIndents, "     bcdef", "0     bcdef."},
    92  		{4, allIndents, "      bcdefg", "0      bcdefg."},
    93  		{4, allIndents, " b de ghijk", "0 b de ghijk."},
    94  		// Verbatim lines before word-wrapped lines.
    95  		{4, allIndents, " b.vw yz", "0 b.1vw.2yz."},
    96  		{4, allIndents, "  bc.vw yz", "0  bc.1vw.2yz."},
    97  		{4, allIndents, "   bcd.vw yz", "0   bcd.1vw.2yz."},
    98  		{4, allIndents, "    bcde.vw yz", "0    bcde.1vw.2yz."},
    99  		{4, allIndents, "     bcdef.vw yz", "0     bcdef.1vw.2yz."},
   100  		{4, allIndents, "      bcdefg.vw yz", "0      bcdefg.1vw.2yz."},
   101  		{4, allIndents, " b de ghijk.vw yz", "0 b de ghijk.1vw.2yz."},
   102  		// Verbatim lines after word-wrapped lines.
   103  		{4, allIndents, "vw yz. b", "0vw.1yz.2 b."},
   104  		{4, allIndents, "vw yz.  bc", "0vw.1yz.2  bc."},
   105  		{4, allIndents, "vw yz.   bcd", "0vw.1yz.2   bcd."},
   106  		{4, allIndents, "vw yz.    bcde", "0vw.1yz.2    bcde."},
   107  		{4, allIndents, "vw yz.     bcdef", "0vw.1yz.2     bcdef."},
   108  		{4, allIndents, "vw yz.      bcdefg", "0vw.1yz.2      bcdefg."},
   109  		{4, allIndents, "vw yz. b de ghijk", "0vw.1yz.2 b de ghijk."},
   110  		// Verbatim lines between word-wrapped lines.
   111  		{4, allIndents, "vw yz. b.mn pq", "0vw.1yz.2 b.2mn.2pq."},
   112  		{4, allIndents, "vw yz.  bc.mn pq", "0vw.1yz.2  bc.2mn.2pq."},
   113  		{4, allIndents, "vw yz.   bcd.mn pq", "0vw.1yz.2   bcd.2mn.2pq."},
   114  		{4, allIndents, "vw yz.    bcde.mn pq", "0vw.1yz.2    bcde.2mn.2pq."},
   115  		{4, allIndents, "vw yz.     bcdef.mn pq", "0vw.1yz.2     bcdef.2mn.2pq."},
   116  		{4, allIndents, "vw yz.      bcdefg.mn pq", "0vw.1yz.2      bcdefg.2mn.2pq."},
   117  		{4, allIndents, "vw yz. b de ghijk.mn pq", "0vw.1yz.2 b de ghijk.2mn.2pq."},
   118  		// Multi-paragraphs via explicit U+2029, and multi-newline.
   119  		{4, allIndents, "ab de ghPij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   120  		{4, allIndents, "ab.de.ghPij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   121  		{4, allIndents, "ab de gh Pij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   122  		{4, allIndents, "ab.de.gh Pij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   123  		{4, allIndents, "ab de ghNNij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   124  		{4, allIndents, "ab.de.ghNNij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   125  		{4, allIndents, "ab de ghNNNij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   126  		{4, allIndents, "ab.de.ghNNNij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   127  		{4, allIndents, "ab de gh N Nij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   128  		{4, allIndents, "ab.de.gh N Nij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   129  		{4, allIndents, "ab de gh N N Nij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   130  		{4, allIndents, "ab.de.gh N N Nij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   131  		// Special-case /r/n is a single EOL, but may be combined.
   132  		{4, allIndents, "ab de ghRNij lm op", "0ab.1de.2gh.2ij.2lm.2op."},
   133  		{4, allIndents, "ab.de.ghRNij.lm.op", "0ab.1de.2gh.2ij.2lm.2op."},
   134  		{4, allIndents, "ab de gh RNij lm op", "0ab.1de.2gh.2ij.2lm.2op."},
   135  		{4, allIndents, "ab.de.gh RNij.lm.op", "0ab.1de.2gh.2ij.2lm.2op."},
   136  		{4, allIndents, "ab de ghRNRNij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   137  		{4, allIndents, "ab.de.ghRNRNij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   138  		{4, allIndents, "ab de gh RN RNij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   139  		{4, allIndents, "ab.de.gh RN RNij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   140  		{4, allIndents, "ab de ghR Nij lm op", "0ab.1de.2gh.:0ij.1lm.2op."},
   141  		{4, allIndents, "ab.de.ghR Nij.lm.op", "0ab.1de.2gh.:0ij.1lm.2op."},
   142  		// Line separator via explicit U+2028 ends lines, but not paragraphs.
   143  		{4, allIndents, "aLcd", "0a.1cd."},
   144  		{4, allIndents, "a Lcd", "0a.1cd."},
   145  		{4, allIndents, "aLLcd", "0a.1cd."},
   146  		{4, allIndents, "a LLcd", "0a.1cd."},
   147  		// 0 width ends up with one word per line, except verbatim lines.
   148  		{0, allIndents, "a c e", "0a.1c.2e."},
   149  		{0, allIndents, "a cd fghij", "0a.1cd.2fghij."},
   150  		{0, allIndents, "a. cd fghij.l n", "0a.1 cd fghij.2l.2n."},
   151  		// -1 width ends up with all words on same line, except verbatim lines.
   152  		{-1, allIndents, "a c e", "0a c e."},
   153  		{-1, allIndents, "a cd fghij", "0a cd fghij."},
   154  		{-1, allIndents, "a. cd fghij.l n", "0a.1 cd fghij.2l n."},
   155  	}
   156  	for _, test := range tests {
   157  		// Run with a variety of chunk sizes.
   158  		for _, sizes := range [][]int{nil, {1}, {2}, {1, 2}, {2, 1}} {
   159  			// Run with a variety of line terminators and paragraph separators.
   160  			for _, lp := range []lp{{}, {"\n", "\n"}, {"L", "P"}, {"LLL", "PPP"}} {
   161  				// Run with a variety of indents.
   162  				if len(test.Indents) == 0 {
   163  					t.Errorf("%d %q %q has no indents, use [][]int{nil} rather than nil", test.Width, test.In, test.Want)
   164  				}
   165  				for _, indents := range test.Indents {
   166  					var buf bytes.Buffer
   167  					w := newUTF8WrapWriter(t, &buf, test.Width, lp, indents)
   168  					wrapWriterWriteFlush(t, w, xlateIn(test.In), sizes)
   169  					if got, want := buf.String(), xlateWant(test.Want, lp, indents); got != want {
   170  						t.Errorf("%q sizes:%v lp:%q indents:%v got %q, want %q", test.In, sizes, lp, indents, got, want)
   171  					}
   172  				}
   173  			}
   174  		}
   175  	}
   176  }
   177  
   178  func TestWrapWriterForceVerbatim(t *testing.T) {
   179  	tests := []struct {
   180  		In   string // See xlateIn for details on the format
   181  		Want string // See xlateIn for details on the format
   182  	}{
   183  		{"", ""},
   184  		{"a", "a."},
   185  		{"a.", "a."},
   186  		{"ab", "ab."},
   187  		{"ab.", "ab."},
   188  		{"abc", "abc."},
   189  		{"abc.", "abc."},
   190  		{"a c", "a c."},
   191  		{"a c.", "a c."},
   192  		{"a cde", "a cde."},
   193  		{"a cde.", "a cde."},
   194  		{"a c e", "a c e."},
   195  		{"a c e.", "a c e."},
   196  		{"a c ef", "a c ef."},
   197  		{"a c ef.", "a c ef."},
   198  		{"a c  f", "a c  f."},
   199  		{"a c  f.", "a c  f."},
   200  		{"a    f", "a    f."},
   201  		{"a    f.", "a    f."},
   202  		{"a c e.g i k", "a c e.g i k."},
   203  		{"a c e.g i k.", "a c e.g i k."},
   204  	}
   205  	for _, test := range tests {
   206  		// Run with a variety of chunk sizes.
   207  		for _, sizes := range [][]int{nil, {1}, {2}, {1, 2}, {2, 1}} {
   208  			var buf bytes.Buffer
   209  			w := newUTF8WrapWriter(t, &buf, 1, lp{}, nil)
   210  			w.ForceVerbatim(true)
   211  			wrapWriterWriteFlush(t, w, xlateIn(test.In), sizes)
   212  			if got, want := buf.String(), xlateIn(test.Want); got != want {
   213  				t.Errorf("%q sizes:%v got %q, want %q", test.In, sizes, got, want)
   214  			}
   215  		}
   216  	}
   217  }
   218  
   219  // xlateIn translates our test.In pattern into an actual input string to feed
   220  // into the writer.  The point is to make it easy to specify the various control
   221  // sequences in a single character, so it's easier to understand.
   222  func xlateIn(text string) string {
   223  	text = strings.Replace(text, "F", "\f", -1)
   224  	text = strings.Replace(text, "N", "\n", -1)
   225  	text = strings.Replace(text, ".", "\n", -1) // Also allow . for easier reading
   226  	text = strings.Replace(text, "R", "\r", -1)
   227  	text = strings.Replace(text, "V", "\v", -1)
   228  	text = strings.Replace(text, "L", "\u2028", -1)
   229  	text = strings.Replace(text, "P", "\u2029", -1)
   230  	return text
   231  }
   232  
   233  // xlateWant translates our test.Want pattern into an actual expected string to
   234  // compare against the output.  The point is to make it easy to read and write
   235  // the expected patterns, and to make it easy to test various indents.
   236  func xlateWant(text string, lp lp, indents []int) string {
   237  	// Dot "." and colon ":" in the want string indicate line terminators and
   238  	// paragraph separators, respectively.
   239  	line := lp.line
   240  	if line == "" {
   241  		line = "\n"
   242  	}
   243  	text = strings.Replace(text, ".", line, -1)
   244  	para := lp.para
   245  	if para == "" {
   246  		para = "\n"
   247  	}
   248  	text = strings.Replace(text, ":", para, -1)
   249  	// The numbers in the want string indicate paragraph line numbers, to make it
   250  	// easier to automatically replace for various indent configurations.
   251  	switch len(indents) {
   252  	case 0:
   253  		text = strings.Replace(text, "0", "", -1)
   254  		text = strings.Replace(text, "1", "", -1)
   255  		text = strings.Replace(text, "2", "", -1)
   256  	case 1:
   257  		text = strings.Replace(text, "0", spaces(indents[0]), -1)
   258  		text = strings.Replace(text, "1", spaces(indents[0]), -1)
   259  		text = strings.Replace(text, "2", spaces(indents[0]), -1)
   260  	case 2:
   261  		text = strings.Replace(text, "0", spaces(indents[0]), -1)
   262  		text = strings.Replace(text, "1", spaces(indents[1]), -1)
   263  		text = strings.Replace(text, "2", spaces(indents[1]), -1)
   264  	case 3:
   265  		text = strings.Replace(text, "0", spaces(indents[0]), -1)
   266  		text = strings.Replace(text, "1", spaces(indents[1]), -1)
   267  		text = strings.Replace(text, "2", spaces(indents[2]), -1)
   268  	}
   269  	return text
   270  }
   271  
   272  func spaces(count int) string {
   273  	return strings.Repeat(" ", count)
   274  }
   275  
   276  func newUTF8WrapWriter(t testing.TB, buf io.Writer, width int, lp lp, indents []int) *WrapWriter {
   277  	w := NewUTF8WrapWriter(buf, width)
   278  	if lp.line != "" || lp.para != "" {
   279  		if err := w.SetLineTerminator(lp.line); err != nil {
   280  			t.Errorf("SetLineTerminator(%q) got %v, want nil", lp.line, err)
   281  		}
   282  		if err := w.SetParagraphSeparator(lp.para); err != nil {
   283  			t.Errorf("SetParagraphSeparator(%q) got %v, want nil", lp.para, err)
   284  		}
   285  	}
   286  	if indents != nil {
   287  		indentStrs := make([]string, len(indents))
   288  		for ix, indent := range indents {
   289  			indentStrs[ix] = spaces(indent)
   290  		}
   291  		if err := w.SetIndents(indentStrs...); err != nil {
   292  			t.Errorf("SetIndents(%v) got %v, want nil", indentStrs, err)
   293  		}
   294  	}
   295  	return w
   296  }
   297  
   298  func wrapWriterWriteFlush(t testing.TB, w *WrapWriter, text string, sizes []int) {
   299  	// Write chunks of different sizes until we've exhausted the input.
   300  	remain := []byte(text)
   301  	for ix := 0; len(remain) > 0; ix++ {
   302  		var chunk []byte
   303  		chunk, remain = nextChunk(remain, sizes, ix)
   304  		got, err := w.Write(chunk)
   305  		if want := len(chunk); got != want || err != nil {
   306  			t.Errorf("%q Write(%q) got (%d,%v), want (%d,nil)", text, chunk, got, err, want)
   307  		}
   308  	}
   309  	// Flush the writer.
   310  	if err := w.Flush(); err != nil {
   311  		t.Errorf("%q Flush() got %v, want nil", text, err)
   312  	}
   313  }
   314  
   315  func benchUTF8WrapWriter(b *testing.B, width int, sizes []int) {
   316  	for i := 0; i < b.N; i++ {
   317  		var buf bytes.Buffer
   318  		w := newUTF8WrapWriter(b, &buf, width, lp{}, nil)
   319  		wrapWriterWriteFlush(b, w, benchText, sizes)
   320  	}
   321  }
   322  
   323  func BenchmarkUTF8WrapWriter_Sizes_0_Width_0(b *testing.B) {
   324  	benchUTF8WrapWriter(b, 0, nil)
   325  }
   326  func BenchmarkUTF8WrapWriter_Sizes_0_Width_10(b *testing.B) {
   327  	benchUTF8WrapWriter(b, 10, nil)
   328  }
   329  func BenchmarkUTF8WrapWriter_Sizes_0_Width_Inf(b *testing.B) {
   330  	benchUTF8WrapWriter(b, -1, nil)
   331  }
   332  
   333  func BenchmarkUTF8WrapWriter_Sizes_1_Width_0(b *testing.B) {
   334  	benchUTF8WrapWriter(b, 0, []int{1})
   335  }
   336  func BenchmarkUTF8WrapWriter_Sizes_1_Width_10(b *testing.B) {
   337  	benchUTF8WrapWriter(b, 10, []int{1})
   338  }
   339  func BenchmarkUTF8WrapWriter_Sizes_1_Width_Inf(b *testing.B) {
   340  	benchUTF8WrapWriter(b, -1, []int{1})
   341  }
   342  
   343  func BenchmarkUTF8WrapWriter_Sizes_1_2_3_Width_0(b *testing.B) {
   344  	benchUTF8WrapWriter(b, 0, []int{1, 2, 3})
   345  }
   346  func BenchmarkUTF8WrapWriter_Sizes_1_2_3_Width_10(b *testing.B) {
   347  	benchUTF8WrapWriter(b, 10, []int{1, 2, 3})
   348  }
   349  func BenchmarkUTF8WrapWriter_Sizes_1_2_3_Width_Inf(b *testing.B) {
   350  	benchUTF8WrapWriter(b, -1, []int{1, 2, 3})
   351  }