github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/strings/replace_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strings_test
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	. "strings"
    11  	"testing"
    12  )
    13  
    14  var htmlEscaper = NewReplacer(
    15  	"&", "&",
    16  	"<", "&lt;",
    17  	">", "&gt;",
    18  	`"`, "&quot;",
    19  	"'", "&apos;",
    20  )
    21  
    22  var htmlUnescaper = NewReplacer(
    23  	"&amp;", "&",
    24  	"&lt;", "<",
    25  	"&gt;", ">",
    26  	"&quot;", `"`,
    27  	"&apos;", "'",
    28  )
    29  
    30  // The http package's old HTML escaping function.
    31  func oldHTMLEscape(s string) string {
    32  	s = Replace(s, "&", "&amp;", -1)
    33  	s = Replace(s, "<", "&lt;", -1)
    34  	s = Replace(s, ">", "&gt;", -1)
    35  	s = Replace(s, `"`, "&quot;", -1)
    36  	s = Replace(s, "'", "&apos;", -1)
    37  	return s
    38  }
    39  
    40  var capitalLetters = NewReplacer("a", "A", "b", "B")
    41  
    42  // TestReplacer tests the replacer implementations.
    43  func TestReplacer(t *testing.T) {
    44  	type testCase struct {
    45  		r       *Replacer
    46  		in, out string
    47  	}
    48  	var testCases []testCase
    49  
    50  	// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
    51  	str := func(b byte) string {
    52  		return string([]byte{b})
    53  	}
    54  	var s []string
    55  
    56  	// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
    57  	s = nil
    58  	for i := 0; i < 256; i++ {
    59  		s = append(s, str(byte(i)), str(byte(i+1)))
    60  	}
    61  	inc := NewReplacer(s...)
    62  
    63  	// Test cases with 1-byte old strings, 1-byte new strings.
    64  	testCases = append(testCases,
    65  		testCase{capitalLetters, "brad", "BrAd"},
    66  		testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
    67  		testCase{capitalLetters, "", ""},
    68  
    69  		testCase{inc, "brad", "csbe"},
    70  		testCase{inc, "\x00\xff", "\x01\x00"},
    71  		testCase{inc, "", ""},
    72  
    73  		testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
    74  	)
    75  
    76  	// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
    77  	s = nil
    78  	for i := 0; i < 256; i++ {
    79  		n := i + 1 - 'a'
    80  		if n < 1 {
    81  			n = 1
    82  		}
    83  		s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
    84  	}
    85  	repeat := NewReplacer(s...)
    86  
    87  	// Test cases with 1-byte old strings, variable length new strings.
    88  	testCases = append(testCases,
    89  		testCase{htmlEscaper, "No changes", "No changes"},
    90  		testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
    91  		testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
    92  		testCase{htmlEscaper, "", ""},
    93  
    94  		testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
    95  		testCase{repeat, "abba", "abbbba"},
    96  		testCase{repeat, "", ""},
    97  
    98  		testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
    99  	)
   100  
   101  	// The remaining test cases have variable length old strings.
   102  
   103  	testCases = append(testCases,
   104  		testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
   105  		testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
   106  		testCase{htmlUnescaper, "", ""},
   107  
   108  		testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
   109  
   110  		testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
   111  
   112  		testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
   113  	)
   114  
   115  	// gen1 has multiple old strings of variable length. There is no
   116  	// overall non-empty common prefix, but some pairwise common prefixes.
   117  	gen1 := NewReplacer(
   118  		"aaa", "3[aaa]",
   119  		"aa", "2[aa]",
   120  		"a", "1[a]",
   121  		"i", "i",
   122  		"longerst", "most long",
   123  		"longer", "medium",
   124  		"long", "short",
   125  		"xx", "xx",
   126  		"x", "X",
   127  		"X", "Y",
   128  		"Y", "Z",
   129  	)
   130  	testCases = append(testCases,
   131  		testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
   132  		testCase{gen1, "long, longerst, longer", "short, most long, medium"},
   133  		testCase{gen1, "xxxxx", "xxxxX"},
   134  		testCase{gen1, "XiX", "YiY"},
   135  		testCase{gen1, "", ""},
   136  	)
   137  
   138  	// gen2 has multiple old strings with no pairwise common prefix.
   139  	gen2 := NewReplacer(
   140  		"roses", "red",
   141  		"violets", "blue",
   142  		"sugar", "sweet",
   143  	)
   144  	testCases = append(testCases,
   145  		testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
   146  		testCase{gen2, "", ""},
   147  	)
   148  
   149  	// gen3 has multiple old strings with an overall common prefix.
   150  	gen3 := NewReplacer(
   151  		"abracadabra", "poof",
   152  		"abracadabrakazam", "splat",
   153  		"abraham", "lincoln",
   154  		"abrasion", "scrape",
   155  		"abraham", "isaac",
   156  	)
   157  	testCases = append(testCases,
   158  		testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
   159  		testCase{gen3, "abrasion abracad", "scrape abracad"},
   160  		testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
   161  		testCase{gen3, "", ""},
   162  	)
   163  
   164  	// foo{1,2,3,4} have multiple old strings with an overall common prefix
   165  	// and 1- or 2- byte extensions from the common prefix.
   166  	foo1 := NewReplacer(
   167  		"foo1", "A",
   168  		"foo2", "B",
   169  		"foo3", "C",
   170  	)
   171  	foo2 := NewReplacer(
   172  		"foo1", "A",
   173  		"foo2", "B",
   174  		"foo31", "C",
   175  		"foo32", "D",
   176  	)
   177  	foo3 := NewReplacer(
   178  		"foo11", "A",
   179  		"foo12", "B",
   180  		"foo31", "C",
   181  		"foo32", "D",
   182  	)
   183  	foo4 := NewReplacer(
   184  		"foo12", "B",
   185  		"foo32", "D",
   186  	)
   187  	testCases = append(testCases,
   188  		testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
   189  		testCase{foo1, "", ""},
   190  
   191  		testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
   192  		testCase{foo2, "", ""},
   193  
   194  		testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
   195  		testCase{foo3, "", ""},
   196  
   197  		testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
   198  		testCase{foo4, "", ""},
   199  	)
   200  
   201  	// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
   202  	allBytes := make([]byte, 256)
   203  	for i := range allBytes {
   204  		allBytes[i] = byte(i)
   205  	}
   206  	allString := string(allBytes)
   207  	genAll := NewReplacer(
   208  		allString, "[all]",
   209  		"\xff", "[ff]",
   210  		"\x00", "[00]",
   211  	)
   212  	testCases = append(testCases,
   213  		testCase{genAll, allString, "[all]"},
   214  		testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
   215  		testCase{genAll, "", ""},
   216  	)
   217  
   218  	// Test cases with empty old strings.
   219  
   220  	blankToX1 := NewReplacer("", "X")
   221  	blankToX2 := NewReplacer("", "X", "", "")
   222  	blankHighPriority := NewReplacer("", "X", "o", "O")
   223  	blankLowPriority := NewReplacer("o", "O", "", "X")
   224  	blankNoOp1 := NewReplacer("", "")
   225  	blankNoOp2 := NewReplacer("", "", "", "A")
   226  	blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
   227  	testCases = append(testCases,
   228  		testCase{blankToX1, "foo", "XfXoXoX"},
   229  		testCase{blankToX1, "", "X"},
   230  
   231  		testCase{blankToX2, "foo", "XfXoXoX"},
   232  		testCase{blankToX2, "", "X"},
   233  
   234  		testCase{blankHighPriority, "oo", "XOXOX"},
   235  		testCase{blankHighPriority, "ii", "XiXiX"},
   236  		testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
   237  		testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
   238  		testCase{blankHighPriority, "", "X"},
   239  
   240  		testCase{blankLowPriority, "oo", "OOX"},
   241  		testCase{blankLowPriority, "ii", "XiXiX"},
   242  		testCase{blankLowPriority, "oiio", "OXiXiOX"},
   243  		testCase{blankLowPriority, "iooi", "XiOOXiX"},
   244  		testCase{blankLowPriority, "", "X"},
   245  
   246  		testCase{blankNoOp1, "foo", "foo"},
   247  		testCase{blankNoOp1, "", ""},
   248  
   249  		testCase{blankNoOp2, "foo", "foo"},
   250  		testCase{blankNoOp2, "", ""},
   251  
   252  		testCase{blankFoo, "foobarfoobaz", "XRXZX"},
   253  		testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
   254  		testCase{blankFoo, "", "X"},
   255  	)
   256  
   257  	// single string replacer
   258  
   259  	abcMatcher := NewReplacer("abc", "[match]")
   260  
   261  	testCases = append(testCases,
   262  		testCase{abcMatcher, "", ""},
   263  		testCase{abcMatcher, "ab", "ab"},
   264  		testCase{abcMatcher, "abc", "[match]"},
   265  		testCase{abcMatcher, "abcd", "[match]d"},
   266  		testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
   267  	)
   268  
   269  	// Issue 6659 cases (more single string replacer)
   270  
   271  	noHello := NewReplacer("Hello", "")
   272  	testCases = append(testCases,
   273  		testCase{noHello, "Hello", ""},
   274  		testCase{noHello, "Hellox", "x"},
   275  		testCase{noHello, "xHello", "x"},
   276  		testCase{noHello, "xHellox", "xx"},
   277  	)
   278  
   279  	// No-arg test cases.
   280  
   281  	nop := NewReplacer()
   282  	testCases = append(testCases,
   283  		testCase{nop, "abc", "abc"},
   284  		testCase{nop, "", ""},
   285  	)
   286  
   287  	// Run the test cases.
   288  
   289  	for i, tc := range testCases {
   290  		if s := tc.r.Replace(tc.in); s != tc.out {
   291  			t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
   292  		}
   293  		var buf bytes.Buffer
   294  		n, err := tc.r.WriteString(&buf, tc.in)
   295  		if err != nil {
   296  			t.Errorf("%d. WriteString: %v", i, err)
   297  			continue
   298  		}
   299  		got := buf.String()
   300  		if got != tc.out {
   301  			t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
   302  			continue
   303  		}
   304  		if n != len(tc.out) {
   305  			t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
   306  				i, tc.in, n, len(tc.out), tc.out)
   307  		}
   308  	}
   309  }
   310  
   311  // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
   312  func TestPickAlgorithm(t *testing.T) {
   313  	testCases := []struct {
   314  		r    *Replacer
   315  		want string
   316  	}{
   317  		{capitalLetters, "*strings.byteReplacer"},
   318  		{htmlEscaper, "*strings.byteStringReplacer"},
   319  		{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
   320  		{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
   321  		{NewReplacer("", "X"), "*strings.genericReplacer"},
   322  		{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
   323  	}
   324  	for i, tc := range testCases {
   325  		got := fmt.Sprintf("%T", tc.r.Replacer())
   326  		if got != tc.want {
   327  			t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
   328  		}
   329  	}
   330  }
   331  
   332  // TestGenericTrieBuilding verifies the structure of the generated trie. There
   333  // is one node per line, and the key ending with the current line is in the
   334  // trie if it ends with a "+".
   335  func TestGenericTrieBuilding(t *testing.T) {
   336  	testCases := []struct{ in, out string }{
   337  		{"abc;abdef;abdefgh;xx;xy;z", `-
   338  			a-
   339  			.b-
   340  			..c+
   341  			..d-
   342  			...ef+
   343  			.....gh+
   344  			x-
   345  			.x+
   346  			.y+
   347  			z+
   348  			`},
   349  		{"abracadabra;abracadabrakazam;abraham;abrasion", `-
   350  			a-
   351  			.bra-
   352  			....c-
   353  			.....adabra+
   354  			...........kazam+
   355  			....h-
   356  			.....am+
   357  			....s-
   358  			.....ion+
   359  			`},
   360  		{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
   361  			X+
   362  			Y+
   363  			a+
   364  			.a+
   365  			..a+
   366  			i+
   367  			l-
   368  			.ong+
   369  			....er+
   370  			......st+
   371  			x+
   372  			.x+
   373  			`},
   374  		{"foo;;foo;foo1", `+
   375  			f-
   376  			.oo+
   377  			...1+
   378  			`},
   379  	}
   380  
   381  	for _, tc := range testCases {
   382  		keys := Split(tc.in, ";")
   383  		args := make([]string, len(keys)*2)
   384  		for i, key := range keys {
   385  			args[i*2] = key
   386  		}
   387  
   388  		got := NewReplacer(args...).PrintTrie()
   389  		// Remove tabs from tc.out
   390  		wantbuf := make([]byte, 0, len(tc.out))
   391  		for i := 0; i < len(tc.out); i++ {
   392  			if tc.out[i] != '\t' {
   393  				wantbuf = append(wantbuf, tc.out[i])
   394  			}
   395  		}
   396  		want := string(wantbuf)
   397  
   398  		if got != want {
   399  			t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
   400  		}
   401  	}
   402  }
   403  
   404  func BenchmarkGenericNoMatch(b *testing.B) {
   405  	str := Repeat("A", 100) + Repeat("B", 100)
   406  	generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
   407  	for i := 0; i < b.N; i++ {
   408  		generic.Replace(str)
   409  	}
   410  }
   411  
   412  func BenchmarkGenericMatch1(b *testing.B) {
   413  	str := Repeat("a", 100) + Repeat("b", 100)
   414  	generic := NewReplacer("a", "A", "b", "B", "12", "123")
   415  	for i := 0; i < b.N; i++ {
   416  		generic.Replace(str)
   417  	}
   418  }
   419  
   420  func BenchmarkGenericMatch2(b *testing.B) {
   421  	str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
   422  	for i := 0; i < b.N; i++ {
   423  		htmlUnescaper.Replace(str)
   424  	}
   425  }
   426  
   427  func benchmarkSingleString(b *testing.B, pattern, text string) {
   428  	r := NewReplacer(pattern, "[match]")
   429  	b.SetBytes(int64(len(text)))
   430  	b.ResetTimer()
   431  	for i := 0; i < b.N; i++ {
   432  		r.Replace(text)
   433  	}
   434  }
   435  
   436  func BenchmarkSingleMaxSkipping(b *testing.B) {
   437  	benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
   438  }
   439  
   440  func BenchmarkSingleLongSuffixFail(b *testing.B) {
   441  	benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
   442  }
   443  
   444  func BenchmarkSingleMatch(b *testing.B) {
   445  	benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
   446  }
   447  
   448  func BenchmarkByteByteNoMatch(b *testing.B) {
   449  	str := Repeat("A", 100) + Repeat("B", 100)
   450  	for i := 0; i < b.N; i++ {
   451  		capitalLetters.Replace(str)
   452  	}
   453  }
   454  
   455  func BenchmarkByteByteMatch(b *testing.B) {
   456  	str := Repeat("a", 100) + Repeat("b", 100)
   457  	for i := 0; i < b.N; i++ {
   458  		capitalLetters.Replace(str)
   459  	}
   460  }
   461  
   462  func BenchmarkByteStringMatch(b *testing.B) {
   463  	str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
   464  	for i := 0; i < b.N; i++ {
   465  		htmlEscaper.Replace(str)
   466  	}
   467  }
   468  
   469  func BenchmarkHTMLEscapeNew(b *testing.B) {
   470  	str := "I <3 to escape HTML & other text too."
   471  	for i := 0; i < b.N; i++ {
   472  		htmlEscaper.Replace(str)
   473  	}
   474  }
   475  
   476  func BenchmarkHTMLEscapeOld(b *testing.B) {
   477  	str := "I <3 to escape HTML & other text too."
   478  	for i := 0; i < b.N; i++ {
   479  		oldHTMLEscape(str)
   480  	}
   481  }
   482  
   483  // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
   484  func BenchmarkByteByteReplaces(b *testing.B) {
   485  	str := Repeat("a", 100) + Repeat("b", 100)
   486  	for i := 0; i < b.N; i++ {
   487  		Replace(Replace(str, "a", "A", -1), "b", "B", -1)
   488  	}
   489  }
   490  
   491  // BenchmarkByteByteMap compares byteByteImpl against Map.
   492  func BenchmarkByteByteMap(b *testing.B) {
   493  	str := Repeat("a", 100) + Repeat("b", 100)
   494  	fn := func(r rune) rune {
   495  		switch r {
   496  		case 'a':
   497  			return 'A'
   498  		case 'b':
   499  			return 'B'
   500  		}
   501  		return r
   502  	}
   503  	for i := 0; i < b.N; i++ {
   504  		Map(fn, str)
   505  	}
   506  }