github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/strings/replace_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings_test 6 7 import ( 8 "bytes" 9 "fmt" 10 . "strings" 11 "testing" 12 ) 13 14 var htmlEscaper = NewReplacer( 15 "&", "&", 16 "<", "<", 17 ">", ">", 18 `"`, """, 19 "'", "'", 20 ) 21 22 var htmlUnescaper = NewReplacer( 23 "&", "&", 24 "<", "<", 25 ">", ">", 26 """, `"`, 27 "'", "'", 28 ) 29 30 // The http package's old HTML escaping function. 31 func oldHTMLEscape(s string) string { 32 s = Replace(s, "&", "&", -1) 33 s = Replace(s, "<", "<", -1) 34 s = Replace(s, ">", ">", -1) 35 s = Replace(s, `"`, """, -1) 36 s = Replace(s, "'", "'", -1) 37 return s 38 } 39 40 var capitalLetters = NewReplacer("a", "A", "b", "B") 41 42 // TestReplacer tests the replacer implementations. 43 func TestReplacer(t *testing.T) { 44 type testCase struct { 45 r *Replacer 46 in, out string 47 } 48 var testCases []testCase 49 50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. 51 str := func(b byte) string { 52 return string([]byte{b}) 53 } 54 var s []string 55 56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". 57 s = nil 58 for i := 0; i < 256; i++ { 59 s = append(s, str(byte(i)), str(byte(i+1))) 60 } 61 inc := NewReplacer(s...) 62 63 // Test cases with 1-byte old strings, 1-byte new strings. 64 testCases = append(testCases, 65 testCase{capitalLetters, "brad", "BrAd"}, 66 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, 67 testCase{capitalLetters, "", ""}, 68 69 testCase{inc, "brad", "csbe"}, 70 testCase{inc, "\x00\xff", "\x01\x00"}, 71 testCase{inc, "", ""}, 72 73 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, 74 ) 75 76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... 77 s = nil 78 for i := 0; i < 256; i++ { 79 n := i + 1 - 'a' 80 if n < 1 { 81 n = 1 82 } 83 s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) 84 } 85 repeat := NewReplacer(s...) 86 87 // Test cases with 1-byte old strings, variable length new strings. 88 testCases = append(testCases, 89 testCase{htmlEscaper, "No changes", "No changes"}, 90 testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, 91 testCase{htmlEscaper, "&&&", "&&&"}, 92 testCase{htmlEscaper, "", ""}, 93 94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, 95 testCase{repeat, "abba", "abbbba"}, 96 testCase{repeat, "", ""}, 97 98 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, 99 ) 100 101 // The remaining test cases have variable length old strings. 102 103 testCases = append(testCases, 104 testCase{htmlUnescaper, "&amp;", "&"}, 105 testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, 106 testCase{htmlUnescaper, "", ""}, 107 108 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, 109 110 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, 111 112 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, 113 ) 114 115 // gen1 has multiple old strings of variable length. There is no 116 // overall non-empty common prefix, but some pairwise common prefixes. 117 gen1 := NewReplacer( 118 "aaa", "3[aaa]", 119 "aa", "2[aa]", 120 "a", "1[a]", 121 "i", "i", 122 "longerst", "most long", 123 "longer", "medium", 124 "long", "short", 125 "xx", "xx", 126 "x", "X", 127 "X", "Y", 128 "Y", "Z", 129 ) 130 testCases = append(testCases, 131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, 132 testCase{gen1, "long, longerst, longer", "short, most long, medium"}, 133 testCase{gen1, "xxxxx", "xxxxX"}, 134 testCase{gen1, "XiX", "YiY"}, 135 testCase{gen1, "", ""}, 136 ) 137 138 // gen2 has multiple old strings with no pairwise common prefix. 139 gen2 := NewReplacer( 140 "roses", "red", 141 "violets", "blue", 142 "sugar", "sweet", 143 ) 144 testCases = append(testCases, 145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, 146 testCase{gen2, "", ""}, 147 ) 148 149 // gen3 has multiple old strings with an overall common prefix. 150 gen3 := NewReplacer( 151 "abracadabra", "poof", 152 "abracadabrakazam", "splat", 153 "abraham", "lincoln", 154 "abrasion", "scrape", 155 "abraham", "isaac", 156 ) 157 testCases = append(testCases, 158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, 159 testCase{gen3, "abrasion abracad", "scrape abracad"}, 160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, 161 testCase{gen3, "", ""}, 162 ) 163 164 // foo{1,2,3,4} have multiple old strings with an overall common prefix 165 // and 1- or 2- byte extensions from the common prefix. 166 foo1 := NewReplacer( 167 "foo1", "A", 168 "foo2", "B", 169 "foo3", "C", 170 ) 171 foo2 := NewReplacer( 172 "foo1", "A", 173 "foo2", "B", 174 "foo31", "C", 175 "foo32", "D", 176 ) 177 foo3 := NewReplacer( 178 "foo11", "A", 179 "foo12", "B", 180 "foo31", "C", 181 "foo32", "D", 182 ) 183 foo4 := NewReplacer( 184 "foo12", "B", 185 "foo32", "D", 186 ) 187 testCases = append(testCases, 188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, 189 testCase{foo1, "", ""}, 190 191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, 192 testCase{foo2, "", ""}, 193 194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, 195 testCase{foo3, "", ""}, 196 197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, 198 testCase{foo4, "", ""}, 199 ) 200 201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. 202 allBytes := make([]byte, 256) 203 for i := range allBytes { 204 allBytes[i] = byte(i) 205 } 206 allString := string(allBytes) 207 genAll := NewReplacer( 208 allString, "[all]", 209 "\xff", "[ff]", 210 "\x00", "[00]", 211 ) 212 testCases = append(testCases, 213 testCase{genAll, allString, "[all]"}, 214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, 215 testCase{genAll, "", ""}, 216 ) 217 218 // Test cases with empty old strings. 219 220 blankToX1 := NewReplacer("", "X") 221 blankToX2 := NewReplacer("", "X", "", "") 222 blankHighPriority := NewReplacer("", "X", "o", "O") 223 blankLowPriority := NewReplacer("o", "O", "", "X") 224 blankNoOp1 := NewReplacer("", "") 225 blankNoOp2 := NewReplacer("", "", "", "A") 226 blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") 227 testCases = append(testCases, 228 testCase{blankToX1, "foo", "XfXoXoX"}, 229 testCase{blankToX1, "", "X"}, 230 231 testCase{blankToX2, "foo", "XfXoXoX"}, 232 testCase{blankToX2, "", "X"}, 233 234 testCase{blankHighPriority, "oo", "XOXOX"}, 235 testCase{blankHighPriority, "ii", "XiXiX"}, 236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, 237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, 238 testCase{blankHighPriority, "", "X"}, 239 240 testCase{blankLowPriority, "oo", "OOX"}, 241 testCase{blankLowPriority, "ii", "XiXiX"}, 242 testCase{blankLowPriority, "oiio", "OXiXiOX"}, 243 testCase{blankLowPriority, "iooi", "XiOOXiX"}, 244 testCase{blankLowPriority, "", "X"}, 245 246 testCase{blankNoOp1, "foo", "foo"}, 247 testCase{blankNoOp1, "", ""}, 248 249 testCase{blankNoOp2, "foo", "foo"}, 250 testCase{blankNoOp2, "", ""}, 251 252 testCase{blankFoo, "foobarfoobaz", "XRXZX"}, 253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, 254 testCase{blankFoo, "", "X"}, 255 ) 256 257 // single string replacer 258 259 abcMatcher := NewReplacer("abc", "[match]") 260 261 testCases = append(testCases, 262 testCase{abcMatcher, "", ""}, 263 testCase{abcMatcher, "ab", "ab"}, 264 testCase{abcMatcher, "abc", "[match]"}, 265 testCase{abcMatcher, "abcd", "[match]d"}, 266 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, 267 ) 268 269 // Issue 6659 cases (more single string replacer) 270 271 noHello := NewReplacer("Hello", "") 272 testCases = append(testCases, 273 testCase{noHello, "Hello", ""}, 274 testCase{noHello, "Hellox", "x"}, 275 testCase{noHello, "xHello", "x"}, 276 testCase{noHello, "xHellox", "xx"}, 277 ) 278 279 // No-arg test cases. 280 281 nop := NewReplacer() 282 testCases = append(testCases, 283 testCase{nop, "abc", "abc"}, 284 testCase{nop, "", ""}, 285 ) 286 287 // Run the test cases. 288 289 for i, tc := range testCases { 290 if s := tc.r.Replace(tc.in); s != tc.out { 291 t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) 292 } 293 var buf bytes.Buffer 294 n, err := tc.r.WriteString(&buf, tc.in) 295 if err != nil { 296 t.Errorf("%d. WriteString: %v", i, err) 297 continue 298 } 299 got := buf.String() 300 if got != tc.out { 301 t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) 302 continue 303 } 304 if n != len(tc.out) { 305 t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", 306 i, tc.in, n, len(tc.out), tc.out) 307 } 308 } 309 } 310 311 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm. 312 func TestPickAlgorithm(t *testing.T) { 313 testCases := []struct { 314 r *Replacer 315 want string 316 }{ 317 {capitalLetters, "*strings.byteReplacer"}, 318 {htmlEscaper, "*strings.byteStringReplacer"}, 319 {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, 320 {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, 321 {NewReplacer("", "X"), "*strings.genericReplacer"}, 322 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, 323 } 324 for i, tc := range testCases { 325 got := fmt.Sprintf("%T", tc.r.Replacer()) 326 if got != tc.want { 327 t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) 328 } 329 } 330 } 331 332 // TestGenericTrieBuilding verifies the structure of the generated trie. There 333 // is one node per line, and the key ending with the current line is in the 334 // trie if it ends with a "+". 335 func TestGenericTrieBuilding(t *testing.T) { 336 testCases := []struct{ in, out string }{ 337 {"abc;abdef;abdefgh;xx;xy;z", `- 338 a- 339 .b- 340 ..c+ 341 ..d- 342 ...ef+ 343 .....gh+ 344 x- 345 .x+ 346 .y+ 347 z+ 348 `}, 349 {"abracadabra;abracadabrakazam;abraham;abrasion", `- 350 a- 351 .bra- 352 ....c- 353 .....adabra+ 354 ...........kazam+ 355 ....h- 356 .....am+ 357 ....s- 358 .....ion+ 359 `}, 360 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- 361 X+ 362 Y+ 363 a+ 364 .a+ 365 ..a+ 366 i+ 367 l- 368 .ong+ 369 ....er+ 370 ......st+ 371 x+ 372 .x+ 373 `}, 374 {"foo;;foo;foo1", `+ 375 f- 376 .oo+ 377 ...1+ 378 `}, 379 } 380 381 for _, tc := range testCases { 382 keys := Split(tc.in, ";") 383 args := make([]string, len(keys)*2) 384 for i, key := range keys { 385 args[i*2] = key 386 } 387 388 got := NewReplacer(args...).PrintTrie() 389 // Remove tabs from tc.out 390 wantbuf := make([]byte, 0, len(tc.out)) 391 for i := 0; i < len(tc.out); i++ { 392 if tc.out[i] != '\t' { 393 wantbuf = append(wantbuf, tc.out[i]) 394 } 395 } 396 want := string(wantbuf) 397 398 if got != want { 399 t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) 400 } 401 } 402 } 403 404 func BenchmarkGenericNoMatch(b *testing.B) { 405 str := Repeat("A", 100) + Repeat("B", 100) 406 generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic 407 for i := 0; i < b.N; i++ { 408 generic.Replace(str) 409 } 410 } 411 412 func BenchmarkGenericMatch1(b *testing.B) { 413 str := Repeat("a", 100) + Repeat("b", 100) 414 generic := NewReplacer("a", "A", "b", "B", "12", "123") 415 for i := 0; i < b.N; i++ { 416 generic.Replace(str) 417 } 418 } 419 420 func BenchmarkGenericMatch2(b *testing.B) { 421 str := Repeat("It's <b>HTML</b>!", 100) 422 for i := 0; i < b.N; i++ { 423 htmlUnescaper.Replace(str) 424 } 425 } 426 427 func benchmarkSingleString(b *testing.B, pattern, text string) { 428 r := NewReplacer(pattern, "[match]") 429 b.SetBytes(int64(len(text))) 430 b.ResetTimer() 431 for i := 0; i < b.N; i++ { 432 r.Replace(text) 433 } 434 } 435 436 func BenchmarkSingleMaxSkipping(b *testing.B) { 437 benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) 438 } 439 440 func BenchmarkSingleLongSuffixFail(b *testing.B) { 441 benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) 442 } 443 444 func BenchmarkSingleMatch(b *testing.B) { 445 benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) 446 } 447 448 func BenchmarkByteByteNoMatch(b *testing.B) { 449 str := Repeat("A", 100) + Repeat("B", 100) 450 for i := 0; i < b.N; i++ { 451 capitalLetters.Replace(str) 452 } 453 } 454 455 func BenchmarkByteByteMatch(b *testing.B) { 456 str := Repeat("a", 100) + Repeat("b", 100) 457 for i := 0; i < b.N; i++ { 458 capitalLetters.Replace(str) 459 } 460 } 461 462 func BenchmarkByteStringMatch(b *testing.B) { 463 str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" 464 for i := 0; i < b.N; i++ { 465 htmlEscaper.Replace(str) 466 } 467 } 468 469 func BenchmarkHTMLEscapeNew(b *testing.B) { 470 str := "I <3 to escape HTML & other text too." 471 for i := 0; i < b.N; i++ { 472 htmlEscaper.Replace(str) 473 } 474 } 475 476 func BenchmarkHTMLEscapeOld(b *testing.B) { 477 str := "I <3 to escape HTML & other text too." 478 for i := 0; i < b.N; i++ { 479 oldHTMLEscape(str) 480 } 481 } 482 483 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. 484 func BenchmarkByteByteReplaces(b *testing.B) { 485 str := Repeat("a", 100) + Repeat("b", 100) 486 for i := 0; i < b.N; i++ { 487 Replace(Replace(str, "a", "A", -1), "b", "B", -1) 488 } 489 } 490 491 // BenchmarkByteByteMap compares byteByteImpl against Map. 492 func BenchmarkByteByteMap(b *testing.B) { 493 str := Repeat("a", 100) + Repeat("b", 100) 494 fn := func(r rune) rune { 495 switch r { 496 case 'a': 497 return 'A' 498 case 'b': 499 return 'B' 500 } 501 return r 502 } 503 for i := 0; i < b.N; i++ { 504 Map(fn, str) 505 } 506 }