go4.org@v0.0.0-20230225012048-214862532bf5/bytereplacer/bytereplacer_test.go (about)

     1  /*
     2  Copyright 2015 The Perkeep Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package bytereplacer
    18  
    19  import (
    20  	"bytes"
    21  	"strings"
    22  	"testing"
    23  )
    24  
    25  var htmlEscaper = New(
    26  	"&", "&",
    27  	"<", "&lt;",
    28  	">", "&gt;",
    29  	`"`, "&quot;",
    30  	"'", "&apos;",
    31  )
    32  
    33  var htmlUnescaper = New(
    34  	"&amp;", "&",
    35  	"&lt;", "<",
    36  	"&gt;", ">",
    37  	"&quot;", `"`,
    38  	"&apos;", "'",
    39  )
    40  
    41  var capitalLetters = New("a", "A", "b", "B")
    42  
    43  func TestReplacer(t *testing.T) {
    44  	type testCase struct {
    45  		r       *Replacer
    46  		in, out string
    47  	}
    48  	var testCases []testCase
    49  
    50  	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
    51  	str := func(b byte) string {
    52  		return string([]byte{b})
    53  	}
    54  	var s []string
    55  
    56  	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
    57  	s = nil
    58  	for i := 0; i < 256; i++ {
    59  		s = append(s, str(byte(i)), str(byte(i+1)))
    60  	}
    61  	inc := New(s...)
    62  
    63  	// Test cases with 1-byte old strings, 1-byte new strings.
    64  	testCases = append(testCases,
    65  		testCase{capitalLetters, "brad", "BrAd"},
    66  		testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
    67  		testCase{capitalLetters, "", ""},
    68  
    69  		testCase{inc, "brad", "csbe"},
    70  		testCase{inc, "\x00\xff", "\x01\x00"},
    71  		testCase{inc, "", ""},
    72  
    73  		testCase{New("a", "1", "a", "2"), "brad", "br1d"},
    74  	)
    75  
    76  	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
    77  	s = nil
    78  	for i := 0; i < 256; i++ {
    79  		n := i + 1 - 'a'
    80  		if n < 1 {
    81  			n = 1
    82  		}
    83  		s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
    84  	}
    85  	repeat := New(s...)
    86  
    87  	// Test cases with 1-byte old strings, variable length new strings.
    88  	testCases = append(testCases,
    89  		testCase{htmlEscaper, "No changes", "No changes"},
    90  		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
    91  		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
    92  		testCase{htmlEscaper, "", ""},
    93  
    94  		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
    95  		testCase{repeat, "abba", "abbbba"},
    96  		testCase{repeat, "", ""},
    97  
    98  		testCase{New("a", "11", "a", "22"), "brad", "br11d"},
    99  	)
   100  
   101  	// The remaining test cases have variable length old strings.
   102  
   103  	testCases = append(testCases,
   104  		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
   105  		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
   106  		testCase{htmlUnescaper, "", ""},
   107  
   108  		testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
   109  
   110  		testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
   111  
   112  		testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
   113  	)
   114  
   115  	// gen1 has multiple old strings of variable length. There is no
   116  	// overall non-empty common prefix, but some pairwise common prefixes.
   117  	gen1 := New(
   118  		"aaa", "3[aaa]",
   119  		"aa", "2[aa]",
   120  		"a", "1[a]",
   121  		"i", "i",
   122  		"longerst", "most long",
   123  		"longer", "medium",
   124  		"long", "short",
   125  		"xx", "xx",
   126  		"x", "X",
   127  		"X", "Y",
   128  		"Y", "Z",
   129  	)
   130  	testCases = append(testCases,
   131  		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
   132  		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
   133  		testCase{gen1, "xxxxx", "xxxxX"},
   134  		testCase{gen1, "XiX", "YiY"},
   135  		testCase{gen1, "", ""},
   136  	)
   137  
   138  	// gen2 has multiple old strings with no pairwise common prefix.
   139  	gen2 := New(
   140  		"roses", "red",
   141  		"violets", "blue",
   142  		"sugar", "sweet",
   143  	)
   144  	testCases = append(testCases,
   145  		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
   146  		testCase{gen2, "", ""},
   147  	)
   148  
   149  	// gen3 has multiple old strings with an overall common prefix.
   150  	gen3 := New(
   151  		"abracadabra", "poof",
   152  		"abracadabrakazam", "splat",
   153  		"abraham", "lincoln",
   154  		"abrasion", "scrape",
   155  		"abraham", "isaac",
   156  	)
   157  	testCases = append(testCases,
   158  		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
   159  		testCase{gen3, "abrasion abracad", "scrape abracad"},
   160  		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
   161  		testCase{gen3, "", ""},
   162  	)
   163  
   164  	// foo{1,2,3,4} have multiple old strings with an overall common prefix
   165  	// and 1- or 2- byte extensions from the common prefix.
   166  	foo1 := New(
   167  		"foo1", "A",
   168  		"foo2", "B",
   169  		"foo3", "C",
   170  	)
   171  	foo2 := New(
   172  		"foo1", "A",
   173  		"foo2", "B",
   174  		"foo31", "C",
   175  		"foo32", "D",
   176  	)
   177  	foo3 := New(
   178  		"foo11", "A",
   179  		"foo12", "B",
   180  		"foo31", "C",
   181  		"foo32", "D",
   182  	)
   183  	foo4 := New(
   184  		"foo12", "B",
   185  		"foo32", "D",
   186  	)
   187  	testCases = append(testCases,
   188  		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
   189  		testCase{foo1, "", ""},
   190  
   191  		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
   192  		testCase{foo2, "", ""},
   193  
   194  		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
   195  		testCase{foo3, "", ""},
   196  
   197  		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
   198  		testCase{foo4, "", ""},
   199  	)
   200  
   201  	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
   202  	allBytes := make([]byte, 256)
   203  	for i := range allBytes {
   204  		allBytes[i] = byte(i)
   205  	}
   206  	allString := string(allBytes)
   207  	genAll := New(
   208  		allString, "[all]",
   209  		"\xff", "[ff]",
   210  		"\x00", "[00]",
   211  	)
   212  	testCases = append(testCases,
   213  		testCase{genAll, allString, "[all]"},
   214  		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
   215  		testCase{genAll, "", ""},
   216  	)
   217  
   218  	// Test cases with empty old strings.
   219  
   220  	blankToX1 := New("", "X")
   221  	blankToX2 := New("", "X", "", "")
   222  	blankHighPriority := New("", "X", "o", "O")
   223  	blankLowPriority := New("o", "O", "", "X")
   224  	blankNoOp1 := New("", "")
   225  	blankNoOp2 := New("", "", "", "A")
   226  	blankFoo := New("", "X", "foobar", "R", "foobaz", "Z")
   227  	testCases = append(testCases,
   228  		testCase{blankToX1, "foo", "XfXoXoX"},
   229  		testCase{blankToX1, "", "X"},
   230  
   231  		testCase{blankToX2, "foo", "XfXoXoX"},
   232  		testCase{blankToX2, "", "X"},
   233  
   234  		testCase{blankHighPriority, "oo", "XOXOX"},
   235  		testCase{blankHighPriority, "ii", "XiXiX"},
   236  		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
   237  		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
   238  		testCase{blankHighPriority, "", "X"},
   239  
   240  		testCase{blankLowPriority, "oo", "OOX"},
   241  		testCase{blankLowPriority, "ii", "XiXiX"},
   242  		testCase{blankLowPriority, "oiio", "OXiXiOX"},
   243  		testCase{blankLowPriority, "iooi", "XiOOXiX"},
   244  		testCase{blankLowPriority, "", "X"},
   245  
   246  		testCase{blankNoOp1, "foo", "foo"},
   247  		testCase{blankNoOp1, "", ""},
   248  
   249  		testCase{blankNoOp2, "foo", "foo"},
   250  		testCase{blankNoOp2, "", ""},
   251  
   252  		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
   253  		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
   254  		testCase{blankFoo, "", "X"},
   255  	)
   256  
   257  	// single string replacer
   258  
   259  	abcMatcher := New("abc", "[match]")
   260  
   261  	testCases = append(testCases,
   262  		testCase{abcMatcher, "", ""},
   263  		testCase{abcMatcher, "ab", "ab"},
   264  		testCase{abcMatcher, "abc", "[match]"},
   265  		testCase{abcMatcher, "abcd", "[match]d"},
   266  		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
   267  	)
   268  
   269  	// Issue 6659 cases (more single string replacer)
   270  
   271  	noHello := New("Hello", "")
   272  	testCases = append(testCases,
   273  		testCase{noHello, "Hello", ""},
   274  		testCase{noHello, "Hellox", "x"},
   275  		testCase{noHello, "xHello", "x"},
   276  		testCase{noHello, "xHellox", "xx"},
   277  	)
   278  
   279  	// No-arg test cases.
   280  
   281  	nop := New()
   282  	testCases = append(testCases,
   283  		testCase{nop, "abc", "abc"},
   284  		testCase{nop, "", ""},
   285  	)
   286  
   287  	// Run the test cases.
   288  
   289  	for i, tc := range testCases {
   290  		{
   291  			// Replace with len(in) == cap(in)
   292  			in := make([]byte, len(tc.in))
   293  			copy(in, tc.in)
   294  			if s := string(tc.r.Replace(in)); s != tc.out {
   295  				t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out)
   296  			}
   297  		}
   298  
   299  		{
   300  			// Replace with len(in) < cap(in)
   301  			in := make([]byte, len(tc.in), len(tc.in)*2)
   302  			copy(in, tc.in)
   303  			if s := string(tc.r.Replace(in)); s != tc.out {
   304  				t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out)
   305  			}
   306  		}
   307  	}
   308  }
   309  
   310  func BenchmarkGenericNoMatch(b *testing.B) {
   311  	str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100))
   312  	generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic
   313  	for i := 0; i < b.N; i++ {
   314  		generic.Replace(str)
   315  	}
   316  }
   317  
   318  func BenchmarkGenericMatch1(b *testing.B) {
   319  	str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100))
   320  	generic := New("a", "A", "b", "B", "12", "123")
   321  	for i := 0; i < b.N; i++ {
   322  		generic.Replace(str)
   323  	}
   324  }
   325  
   326  func BenchmarkGenericMatch2(b *testing.B) {
   327  	str := bytes.Repeat([]byte("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!"), 100)
   328  	for i := 0; i < b.N; i++ {
   329  		htmlUnescaper.Replace(str)
   330  	}
   331  }
   332  
   333  func benchmarkSingleString(b *testing.B, pattern, text string) {
   334  	r := New(pattern, "[match]")
   335  	buf := make([]byte, len(text), len(text)*7)
   336  	b.SetBytes(int64(len(text)))
   337  	b.ResetTimer()
   338  	for i := 0; i < b.N; i++ {
   339  		copy(buf, text)
   340  		r.Replace(buf)
   341  	}
   342  }
   343  
   344  func BenchmarkSingleMaxSkipping(b *testing.B) {
   345  	benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
   346  }
   347  
   348  func BenchmarkSingleLongSuffixFail(b *testing.B) {
   349  	benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
   350  }
   351  
   352  func BenchmarkSingleMatch(b *testing.B) {
   353  	benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
   354  }
   355  
   356  func benchmarkReplacer(b *testing.B, r *Replacer, str string) {
   357  	buf := make([]byte, len(str))
   358  	b.ResetTimer()
   359  	for i := 0; i < b.N; i++ {
   360  		copy(buf, str)
   361  		r.Replace(buf)
   362  	}
   363  }
   364  
   365  func BenchmarkByteByteNoMatch(b *testing.B) {
   366  	benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100))
   367  }
   368  
   369  func BenchmarkByteByteMatch(b *testing.B) {
   370  	benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100))
   371  }
   372  
   373  func BenchmarkByteStringMatch(b *testing.B) {
   374  	benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">")
   375  }
   376  
   377  func BenchmarkHTMLEscapeNew(b *testing.B) {
   378  	benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.")
   379  }
   380  
   381  func BenchmarkHTMLEscapeOld(b *testing.B) {
   382  	str := "I <3 to escape HTML & other text too."
   383  	buf := make([]byte, len(str))
   384  	for i := 0; i < b.N; i++ {
   385  		copy(buf, str)
   386  		oldHTMLEscape(buf)
   387  	}
   388  }
   389  
   390  // The http package's old HTML escaping function in bytes form.
   391  func oldHTMLEscape(s []byte) []byte {
   392  	s = bytes.Replace(s, []byte("&"), []byte("&amp;"), -1)
   393  	s = bytes.Replace(s, []byte("<"), []byte("&lt;"), -1)
   394  	s = bytes.Replace(s, []byte(">"), []byte("&gt;"), -1)
   395  	s = bytes.Replace(s, []byte(`"`), []byte("&quot;"), -1)
   396  	s = bytes.Replace(s, []byte("'"), []byte("&apos;"), -1)
   397  	return s
   398  }
   399  
   400  // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
   401  func BenchmarkByteByteReplaces(b *testing.B) {
   402  	str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
   403  	for i := 0; i < b.N; i++ {
   404  		bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1)
   405  	}
   406  }
   407  
   408  // BenchmarkByteByteMap compares byteByteImpl against Map.
   409  func BenchmarkByteByteMap(b *testing.B) {
   410  	str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
   411  	fn := func(r rune) rune {
   412  		switch r {
   413  		case 'a':
   414  			return 'A'
   415  		case 'b':
   416  			return 'B'
   417  		}
   418  		return r
   419  	}
   420  	for i := 0; i < b.N; i++ {
   421  		bytes.Map(fn, []byte(str))
   422  	}
   423  }