github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/strings/strings_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strings_test 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "math/rand" 12 "reflect" 13 . "strings" 14 "testing" 15 "unicode" 16 "unicode/utf8" 17 "unsafe" 18 ) 19 20 func eq(a, b []string) bool { 21 if len(a) != len(b) { 22 return false 23 } 24 for i := 0; i < len(a); i++ { 25 if a[i] != b[i] { 26 return false 27 } 28 } 29 return true 30 } 31 32 var abcd = "abcd" 33 var faces = "☺☻☹" 34 var commas = "1,2,3,4" 35 var dots = "1....2....3....4" 36 37 type IndexTest struct { 38 s string 39 sep string 40 out int 41 } 42 43 var indexTests = []IndexTest{ 44 {"", "", 0}, 45 {"", "a", -1}, 46 {"", "foo", -1}, 47 {"fo", "foo", -1}, 48 {"foo", "foo", 0}, 49 {"oofofoofooo", "f", 2}, 50 {"oofofoofooo", "foo", 4}, 51 {"barfoobarfoo", "foo", 3}, 52 {"foo", "", 0}, 53 {"foo", "o", 1}, 54 {"abcABCabc", "A", 3}, 55 // cases with one byte strings - test special case in Index() 56 {"", "a", -1}, 57 {"x", "a", -1}, 58 {"x", "x", 0}, 59 {"abc", "a", 0}, 60 {"abc", "b", 1}, 61 {"abc", "c", 2}, 62 {"abc", "x", -1}, 63 // test special cases in Index() for short strings 64 {"", "ab", -1}, 65 {"bc", "ab", -1}, 66 {"ab", "ab", 0}, 67 {"xab", "ab", 1}, 68 {"xab"[:2], "ab", -1}, 69 {"", "abc", -1}, 70 {"xbc", "abc", -1}, 71 {"abc", "abc", 0}, 72 {"xabc", "abc", 1}, 73 {"xabc"[:3], "abc", -1}, 74 {"xabxc", "abc", -1}, 75 {"", "abcd", -1}, 76 {"xbcd", "abcd", -1}, 77 {"abcd", "abcd", 0}, 78 {"xabcd", "abcd", 1}, 79 {"xyabcd"[:5], "abcd", -1}, 80 {"xbcqq", "abcqq", -1}, 81 {"abcqq", "abcqq", 0}, 82 {"xabcqq", "abcqq", 1}, 83 {"xyabcqq"[:6], "abcqq", -1}, 84 {"xabxcqq", "abcqq", -1}, 85 {"xabcqxq", "abcqq", -1}, 86 {"", "01234567", -1}, 87 {"32145678", "01234567", -1}, 88 {"01234567", "01234567", 0}, 89 {"x01234567", "01234567", 1}, 90 {"x0123456x01234567", "01234567", 9}, 91 {"xx01234567"[:9], "01234567", -1}, 92 {"", "0123456789", -1}, 93 {"3214567844", "0123456789", -1}, 94 {"0123456789", "0123456789", 0}, 95 {"x0123456789", "0123456789", 1}, 96 {"x012345678x0123456789", "0123456789", 11}, 97 {"xyz0123456789"[:12], "0123456789", -1}, 98 {"x01234567x89", "0123456789", -1}, 99 {"", "0123456789012345", -1}, 100 {"3214567889012345", "0123456789012345", -1}, 101 {"0123456789012345", "0123456789012345", 0}, 102 {"x0123456789012345", "0123456789012345", 1}, 103 {"x012345678901234x0123456789012345", "0123456789012345", 17}, 104 {"", "01234567890123456789", -1}, 105 {"32145678890123456789", "01234567890123456789", -1}, 106 {"01234567890123456789", "01234567890123456789", 0}, 107 {"x01234567890123456789", "01234567890123456789", 1}, 108 {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21}, 109 {"xyz01234567890123456789"[:22], "01234567890123456789", -1}, 110 {"", "0123456789012345678901234567890", -1}, 111 {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1}, 112 {"0123456789012345678901234567890", "0123456789012345678901234567890", 0}, 113 {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1}, 114 {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32}, 115 {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1}, 116 {"", "01234567890123456789012345678901", -1}, 117 {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1}, 118 {"01234567890123456789012345678901", "01234567890123456789012345678901", 0}, 119 {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1}, 120 {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33}, 121 {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1}, 122 {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6}, 123 {"", "0123456789012345678901234567890123456789", -1}, 124 {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2}, 125 {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1}, 126 {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1}, 127 {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65}, 128 // test fallback to Rabin-Karp. 129 {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, 130 {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, 131 } 132 133 var lastIndexTests = []IndexTest{ 134 {"", "", 0}, 135 {"", "a", -1}, 136 {"", "foo", -1}, 137 {"fo", "foo", -1}, 138 {"foo", "foo", 0}, 139 {"foo", "f", 0}, 140 {"oofofoofooo", "f", 7}, 141 {"oofofoofooo", "foo", 7}, 142 {"barfoobarfoo", "foo", 9}, 143 {"foo", "", 3}, 144 {"foo", "o", 2}, 145 {"abcABCabc", "A", 3}, 146 {"abcABCabc", "a", 6}, 147 } 148 149 var indexAnyTests = []IndexTest{ 150 {"", "", -1}, 151 {"", "a", -1}, 152 {"", "abc", -1}, 153 {"a", "", -1}, 154 {"a", "a", 0}, 155 {"aaa", "a", 0}, 156 {"abc", "xyz", -1}, 157 {"abc", "xcz", 2}, 158 {"ab☺c", "x☺yz", 2}, 159 {"a☺b☻c☹d", "cx", len("a☺b☻")}, 160 {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, 161 {"aRegExp*", ".(|)*+?^$[]", 7}, 162 {dots + dots + dots, " ", -1}, 163 {"012abcba210", "\xffb", 4}, 164 {"012\x80bcb\x80210", "\xffb", 3}, 165 } 166 167 var lastIndexAnyTests = []IndexTest{ 168 {"", "", -1}, 169 {"", "a", -1}, 170 {"", "abc", -1}, 171 {"a", "", -1}, 172 {"a", "a", 0}, 173 {"aaa", "a", 2}, 174 {"abc", "xyz", -1}, 175 {"abc", "ab", 1}, 176 {"ab☺c", "x☺yz", 2}, 177 {"a☺b☻c☹d", "cx", len("a☺b☻")}, 178 {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")}, 179 {"a.RegExp*", ".(|)*+?^$[]", 8}, 180 {dots + dots + dots, " ", -1}, 181 {"012abcba210", "\xffb", 6}, 182 {"012\x80bcb\x80210", "\xffb", 7}, 183 } 184 185 // Execute f on each test case. funcName should be the name of f; it's used 186 // in failure reports. 187 func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) { 188 for _, test := range testCases { 189 actual := f(test.s, test.sep) 190 if actual != test.out { 191 t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out) 192 } 193 } 194 } 195 196 func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } 197 func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } 198 func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } 199 func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) } 200 201 func TestLastIndexByte(t *testing.T) { 202 testCases := []IndexTest{ 203 {"", "q", -1}, 204 {"abcdef", "q", -1}, 205 {"abcdefabcdef", "a", len("abcdef")}, // something in the middle 206 {"abcdefabcdef", "f", len("abcdefabcde")}, // last byte 207 {"zabcdefabcdef", "z", 0}, // first byte 208 {"a☺b☻c☹d", "b", len("a☺")}, // non-ascii 209 } 210 for _, test := range testCases { 211 actual := LastIndexByte(test.s, test.sep[0]) 212 if actual != test.out { 213 t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out) 214 } 215 } 216 } 217 218 func simpleIndex(s, sep string) int { 219 n := len(sep) 220 for i := n; i <= len(s); i++ { 221 if s[i-n:i] == sep { 222 return i - n 223 } 224 } 225 return -1 226 } 227 228 func TestIndexRandom(t *testing.T) { 229 const chars = "abcdefghijklmnopqrstuvwxyz0123456789" 230 for times := 0; times < 10; times++ { 231 for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary 232 s1 := make([]byte, strLen) 233 for i := range s1 { 234 s1[i] = chars[rand.Intn(len(chars))] 235 } 236 s := string(s1) 237 for i := 0; i < 50; i++ { 238 begin := rand.Intn(len(s) + 1) 239 end := begin + rand.Intn(len(s)+1-begin) 240 sep := s[begin:end] 241 if i%4 == 0 { 242 pos := rand.Intn(len(sep) + 1) 243 sep = sep[:pos] + "A" + sep[pos:] 244 } 245 want := simpleIndex(s, sep) 246 res := Index(s, sep) 247 if res != want { 248 t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want) 249 } 250 } 251 } 252 } 253 } 254 255 func TestIndexRune(t *testing.T) { 256 tests := []struct { 257 in string 258 rune rune 259 want int 260 }{ 261 {"", 'a', -1}, 262 {"", '☺', -1}, 263 {"foo", '☹', -1}, 264 {"foo", 'o', 1}, 265 {"foo☺bar", '☺', 3}, 266 {"foo☺☻☹bar", '☹', 9}, 267 {"a A x", 'A', 2}, 268 {"some_text=some_value", '=', 9}, 269 {"☺a", 'a', 3}, 270 {"a☻☺b", '☺', 4}, 271 272 // RuneError should match any invalid UTF-8 byte sequence. 273 {"�", '�', 0}, 274 {"\xff", '�', 0}, 275 {"☻x�", '�', len("☻x")}, 276 {"☻x\xe2\x98", '�', len("☻x")}, 277 {"☻x\xe2\x98�", '�', len("☻x")}, 278 {"☻x\xe2\x98x", '�', len("☻x")}, 279 280 // Invalid rune values should never match. 281 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1}, 282 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair 283 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1}, 284 } 285 for _, tt := range tests { 286 if got := IndexRune(tt.in, tt.rune); got != tt.want { 287 t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want) 288 } 289 } 290 291 haystack := "test世界" 292 allocs := testing.AllocsPerRun(1000, func() { 293 if i := IndexRune(haystack, 's'); i != 2 { 294 t.Fatalf("'s' at %d; want 2", i) 295 } 296 if i := IndexRune(haystack, '世'); i != 4 { 297 t.Fatalf("'世' at %d; want 4", i) 298 } 299 }) 300 if allocs != 0 && testing.CoverMode() == "" { 301 t.Errorf("expected no allocations, got %f", allocs) 302 } 303 } 304 305 const benchmarkString = "some_text=some☺value" 306 307 func BenchmarkIndexRune(b *testing.B) { 308 if got := IndexRune(benchmarkString, '☺'); got != 14 { 309 b.Fatalf("wrong index: expected 14, got=%d", got) 310 } 311 for i := 0; i < b.N; i++ { 312 IndexRune(benchmarkString, '☺') 313 } 314 } 315 316 var benchmarkLongString = Repeat(" ", 100) + benchmarkString 317 318 func BenchmarkIndexRuneLongString(b *testing.B) { 319 if got := IndexRune(benchmarkLongString, '☺'); got != 114 { 320 b.Fatalf("wrong index: expected 114, got=%d", got) 321 } 322 for i := 0; i < b.N; i++ { 323 IndexRune(benchmarkLongString, '☺') 324 } 325 } 326 327 func BenchmarkIndexRuneFastPath(b *testing.B) { 328 if got := IndexRune(benchmarkString, 'v'); got != 17 { 329 b.Fatalf("wrong index: expected 17, got=%d", got) 330 } 331 for i := 0; i < b.N; i++ { 332 IndexRune(benchmarkString, 'v') 333 } 334 } 335 336 func BenchmarkIndex(b *testing.B) { 337 if got := Index(benchmarkString, "v"); got != 17 { 338 b.Fatalf("wrong index: expected 17, got=%d", got) 339 } 340 for i := 0; i < b.N; i++ { 341 Index(benchmarkString, "v") 342 } 343 } 344 345 func BenchmarkLastIndex(b *testing.B) { 346 if got := Index(benchmarkString, "v"); got != 17 { 347 b.Fatalf("wrong index: expected 17, got=%d", got) 348 } 349 for i := 0; i < b.N; i++ { 350 LastIndex(benchmarkString, "v") 351 } 352 } 353 354 func BenchmarkIndexByte(b *testing.B) { 355 if got := IndexByte(benchmarkString, 'v'); got != 17 { 356 b.Fatalf("wrong index: expected 17, got=%d", got) 357 } 358 for i := 0; i < b.N; i++ { 359 IndexByte(benchmarkString, 'v') 360 } 361 } 362 363 type SplitTest struct { 364 s string 365 sep string 366 n int 367 a []string 368 } 369 370 var splittests = []SplitTest{ 371 {"", "", -1, []string{}}, 372 {abcd, "", 2, []string{"a", "bcd"}}, 373 {abcd, "", 4, []string{"a", "b", "c", "d"}}, 374 {abcd, "", -1, []string{"a", "b", "c", "d"}}, 375 {faces, "", -1, []string{"☺", "☻", "☹"}}, 376 {faces, "", 3, []string{"☺", "☻", "☹"}}, 377 {faces, "", 17, []string{"☺", "☻", "☹"}}, 378 {"☺�☹", "", -1, []string{"☺", "�", "☹"}}, 379 {abcd, "a", 0, nil}, 380 {abcd, "a", -1, []string{"", "bcd"}}, 381 {abcd, "z", -1, []string{"abcd"}}, 382 {commas, ",", -1, []string{"1", "2", "3", "4"}}, 383 {dots, "...", -1, []string{"1", ".2", ".3", ".4"}}, 384 {faces, "☹", -1, []string{"☺☻", ""}}, 385 {faces, "~", -1, []string{faces}}, 386 {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}}, 387 {"1 2", " ", 3, []string{"1", "2"}}, 388 } 389 390 func TestSplit(t *testing.T) { 391 for _, tt := range splittests { 392 a := SplitN(tt.s, tt.sep, tt.n) 393 if !eq(a, tt.a) { 394 t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a) 395 continue 396 } 397 if tt.n == 0 { 398 continue 399 } 400 s := Join(a, tt.sep) 401 if s != tt.s { 402 t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s) 403 } 404 if tt.n < 0 { 405 b := Split(tt.s, tt.sep) 406 if !reflect.DeepEqual(a, b) { 407 t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) 408 } 409 } 410 } 411 } 412 413 var splitaftertests = []SplitTest{ 414 {abcd, "a", -1, []string{"a", "bcd"}}, 415 {abcd, "z", -1, []string{"abcd"}}, 416 {abcd, "", -1, []string{"a", "b", "c", "d"}}, 417 {commas, ",", -1, []string{"1,", "2,", "3,", "4"}}, 418 {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}}, 419 {faces, "☹", -1, []string{"☺☻☹", ""}}, 420 {faces, "~", -1, []string{faces}}, 421 {faces, "", -1, []string{"☺", "☻", "☹"}}, 422 {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}}, 423 {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}}, 424 {"1 2", " ", 3, []string{"1 ", "2"}}, 425 {"123", "", 2, []string{"1", "23"}}, 426 {"123", "", 17, []string{"1", "2", "3"}}, 427 } 428 429 func TestSplitAfter(t *testing.T) { 430 for _, tt := range splitaftertests { 431 a := SplitAfterN(tt.s, tt.sep, tt.n) 432 if !eq(a, tt.a) { 433 t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a) 434 continue 435 } 436 s := Join(a, "") 437 if s != tt.s { 438 t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s) 439 } 440 if tt.n < 0 { 441 b := SplitAfter(tt.s, tt.sep) 442 if !reflect.DeepEqual(a, b) { 443 t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) 444 } 445 } 446 } 447 } 448 449 type FieldsTest struct { 450 s string 451 a []string 452 } 453 454 var fieldstests = []FieldsTest{ 455 {"", []string{}}, 456 {" ", []string{}}, 457 {" \t ", []string{}}, 458 {"\u2000", []string{}}, 459 {" abc ", []string{"abc"}}, 460 {"1 2 3 4", []string{"1", "2", "3", "4"}}, 461 {"1 2 3 4", []string{"1", "2", "3", "4"}}, 462 {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}}, 463 {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}}, 464 {"\u2000\u2001\u2002", []string{}}, 465 {"\n™\t™\n", []string{"™", "™"}}, 466 {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}}, 467 {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}}, 468 {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}}, 469 {faces, []string{faces}}, 470 } 471 472 func TestFields(t *testing.T) { 473 for _, tt := range fieldstests { 474 a := Fields(tt.s) 475 if !eq(a, tt.a) { 476 t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a) 477 continue 478 } 479 } 480 } 481 482 var FieldsFuncTests = []FieldsTest{ 483 {"", []string{}}, 484 {"XX", []string{}}, 485 {"XXhiXXX", []string{"hi"}}, 486 {"aXXbXXXcX", []string{"a", "b", "c"}}, 487 } 488 489 func TestFieldsFunc(t *testing.T) { 490 for _, tt := range fieldstests { 491 a := FieldsFunc(tt.s, unicode.IsSpace) 492 if !eq(a, tt.a) { 493 t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a) 494 continue 495 } 496 } 497 pred := func(c rune) bool { return c == 'X' } 498 for _, tt := range FieldsFuncTests { 499 a := FieldsFunc(tt.s, pred) 500 if !eq(a, tt.a) { 501 t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a) 502 } 503 } 504 } 505 506 // Test case for any function which accepts and returns a single string. 507 type StringTest struct { 508 in, out string 509 } 510 511 // Execute f on each test case. funcName should be the name of f; it's used 512 // in failure reports. 513 func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) { 514 for _, tc := range testCases { 515 actual := f(tc.in) 516 if actual != tc.out { 517 t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out) 518 } 519 } 520 } 521 522 var upperTests = []StringTest{ 523 {"", ""}, 524 {"ONLYUPPER", "ONLYUPPER"}, 525 {"abc", "ABC"}, 526 {"AbC123", "ABC123"}, 527 {"azAZ09_", "AZAZ09_"}, 528 {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, 529 {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, 530 {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char 531 {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune 532 } 533 534 var lowerTests = []StringTest{ 535 {"", ""}, 536 {"abc", "abc"}, 537 {"AbC123", "abc123"}, 538 {"azAZ09_", "azaz09_"}, 539 {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, 540 {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, 541 {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char 542 {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune 543 } 544 545 const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" 546 547 var trimSpaceTests = []StringTest{ 548 {"", ""}, 549 {"abc", "abc"}, 550 {space + "abc" + space, "abc"}, 551 {" ", ""}, 552 {" \t\r\n \t\t\r\r\n\n ", ""}, 553 {" \t\r\n x\t\t\r\r\n\n ", "x"}, 554 {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"}, 555 {"1 \t\r\n2", "1 \t\r\n2"}, 556 {" x\x80", "x\x80"}, 557 {" x\xc0", "x\xc0"}, 558 {"x \xc0\xc0 ", "x \xc0\xc0"}, 559 {"x \xc0", "x \xc0"}, 560 {"x \xc0 ", "x \xc0"}, 561 {"x \xc0\xc0 ", "x \xc0\xc0"}, 562 {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"}, 563 {"x ☺ ", "x ☺"}, 564 } 565 566 func tenRunes(ch rune) string { 567 r := make([]rune, 10) 568 for i := range r { 569 r[i] = ch 570 } 571 return string(r) 572 } 573 574 // User-defined self-inverse mapping function 575 func rot13(r rune) rune { 576 step := rune(13) 577 if r >= 'a' && r <= 'z' { 578 return ((r - 'a' + step) % 26) + 'a' 579 } 580 if r >= 'A' && r <= 'Z' { 581 return ((r - 'A' + step) % 26) + 'A' 582 } 583 return r 584 } 585 586 func TestMap(t *testing.T) { 587 // Run a couple of awful growth/shrinkage tests 588 a := tenRunes('a') 589 // 1. Grow. This triggers two reallocations in Map. 590 maxRune := func(rune) rune { return unicode.MaxRune } 591 m := Map(maxRune, a) 592 expect := tenRunes(unicode.MaxRune) 593 if m != expect { 594 t.Errorf("growing: expected %q got %q", expect, m) 595 } 596 597 // 2. Shrink 598 minRune := func(rune) rune { return 'a' } 599 m = Map(minRune, tenRunes(unicode.MaxRune)) 600 expect = a 601 if m != expect { 602 t.Errorf("shrinking: expected %q got %q", expect, m) 603 } 604 605 // 3. Rot13 606 m = Map(rot13, "a to zed") 607 expect = "n gb mrq" 608 if m != expect { 609 t.Errorf("rot13: expected %q got %q", expect, m) 610 } 611 612 // 4. Rot13^2 613 m = Map(rot13, Map(rot13, "a to zed")) 614 expect = "a to zed" 615 if m != expect { 616 t.Errorf("rot13: expected %q got %q", expect, m) 617 } 618 619 // 5. Drop 620 dropNotLatin := func(r rune) rune { 621 if unicode.Is(unicode.Latin, r) { 622 return r 623 } 624 return -1 625 } 626 m = Map(dropNotLatin, "Hello, 세계") 627 expect = "Hello" 628 if m != expect { 629 t.Errorf("drop: expected %q got %q", expect, m) 630 } 631 632 // 6. Identity 633 identity := func(r rune) rune { 634 return r 635 } 636 orig := "Input string that we expect not to be copied." 637 m = Map(identity, orig) 638 if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data != 639 (*reflect.StringHeader)(unsafe.Pointer(&m)).Data { 640 t.Error("unexpected copy during identity map") 641 } 642 643 // 7. Handle invalid UTF-8 sequence 644 replaceNotLatin := func(r rune) rune { 645 if unicode.Is(unicode.Latin, r) { 646 return r 647 } 648 return '?' 649 } 650 m = Map(replaceNotLatin, "Hello\255World") 651 expect = "Hello?World" 652 if m != expect { 653 t.Errorf("replace invalid sequence: expected %q got %q", expect, m) 654 } 655 656 // 8. Check utf8.RuneSelf and utf8.MaxRune encoding 657 encode := func(r rune) rune { 658 switch r { 659 case utf8.RuneSelf: 660 return unicode.MaxRune 661 case unicode.MaxRune: 662 return utf8.RuneSelf 663 } 664 return r 665 } 666 s := string(utf8.RuneSelf) + string(utf8.MaxRune) 667 r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s 668 m = Map(encode, s) 669 if m != r { 670 t.Errorf("encoding not handled correctly: expected %q got %q", r, m) 671 } 672 m = Map(encode, r) 673 if m != s { 674 t.Errorf("encoding not handled correctly: expected %q got %q", s, m) 675 } 676 } 677 678 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } 679 680 func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } 681 682 func BenchmarkToUpper(b *testing.B) { 683 for _, tc := range upperTests { 684 b.Run(tc.in, func(b *testing.B) { 685 for i := 0; i < b.N; i++ { 686 actual := ToUpper(tc.in) 687 if actual != tc.out { 688 b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out) 689 } 690 } 691 }) 692 } 693 } 694 695 func BenchmarkToLower(b *testing.B) { 696 for _, tc := range lowerTests { 697 b.Run(tc.in, func(b *testing.B) { 698 for i := 0; i < b.N; i++ { 699 actual := ToLower(tc.in) 700 if actual != tc.out { 701 b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out) 702 } 703 } 704 }) 705 } 706 } 707 708 func BenchmarkMapNoChanges(b *testing.B) { 709 identity := func(r rune) rune { 710 return r 711 } 712 for i := 0; i < b.N; i++ { 713 Map(identity, "Some string that won't be modified.") 714 } 715 } 716 717 func TestSpecialCase(t *testing.T) { 718 lower := "abcçdefgğhıijklmnoöprsştuüvyz" 719 upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ" 720 u := ToUpperSpecial(unicode.TurkishCase, upper) 721 if u != upper { 722 t.Errorf("Upper(upper) is %s not %s", u, upper) 723 } 724 u = ToUpperSpecial(unicode.TurkishCase, lower) 725 if u != upper { 726 t.Errorf("Upper(lower) is %s not %s", u, upper) 727 } 728 l := ToLowerSpecial(unicode.TurkishCase, lower) 729 if l != lower { 730 t.Errorf("Lower(lower) is %s not %s", l, lower) 731 } 732 l = ToLowerSpecial(unicode.TurkishCase, upper) 733 if l != lower { 734 t.Errorf("Lower(upper) is %s not %s", l, lower) 735 } 736 } 737 738 func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) } 739 740 var trimTests = []struct { 741 f string 742 in, arg, out string 743 }{ 744 {"Trim", "abba", "a", "bb"}, 745 {"Trim", "abba", "ab", ""}, 746 {"TrimLeft", "abba", "ab", ""}, 747 {"TrimRight", "abba", "ab", ""}, 748 {"TrimLeft", "abba", "a", "bba"}, 749 {"TrimRight", "abba", "a", "abb"}, 750 {"Trim", "<tag>", "<>", "tag"}, 751 {"Trim", "* listitem", " *", "listitem"}, 752 {"Trim", `"quote"`, `"`, "quote"}, 753 {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"}, 754 {"Trim", "\x80test\xff", "\xff", "test"}, 755 {"Trim", " Ġ ", " ", "Ġ"}, 756 {"Trim", " Ġİ0", "0 ", "Ġİ"}, 757 //empty string tests 758 {"Trim", "abba", "", "abba"}, 759 {"Trim", "", "123", ""}, 760 {"Trim", "", "", ""}, 761 {"TrimLeft", "abba", "", "abba"}, 762 {"TrimLeft", "", "123", ""}, 763 {"TrimLeft", "", "", ""}, 764 {"TrimRight", "abba", "", "abba"}, 765 {"TrimRight", "", "123", ""}, 766 {"TrimRight", "", "", ""}, 767 {"TrimRight", "☺\xc0", "☺", "☺\xc0"}, 768 {"TrimPrefix", "aabb", "a", "abb"}, 769 {"TrimPrefix", "aabb", "b", "aabb"}, 770 {"TrimSuffix", "aabb", "a", "aabb"}, 771 {"TrimSuffix", "aabb", "b", "aab"}, 772 } 773 774 func TestTrim(t *testing.T) { 775 for _, tc := range trimTests { 776 name := tc.f 777 var f func(string, string) string 778 switch name { 779 case "Trim": 780 f = Trim 781 case "TrimLeft": 782 f = TrimLeft 783 case "TrimRight": 784 f = TrimRight 785 case "TrimPrefix": 786 f = TrimPrefix 787 case "TrimSuffix": 788 f = TrimSuffix 789 default: 790 t.Errorf("Undefined trim function %s", name) 791 } 792 actual := f(tc.in, tc.arg) 793 if actual != tc.out { 794 t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) 795 } 796 } 797 } 798 799 func BenchmarkTrim(b *testing.B) { 800 b.ReportAllocs() 801 802 for i := 0; i < b.N; i++ { 803 for _, tc := range trimTests { 804 name := tc.f 805 var f func(string, string) string 806 switch name { 807 case "Trim": 808 f = Trim 809 case "TrimLeft": 810 f = TrimLeft 811 case "TrimRight": 812 f = TrimRight 813 case "TrimPrefix": 814 f = TrimPrefix 815 case "TrimSuffix": 816 f = TrimSuffix 817 default: 818 b.Errorf("Undefined trim function %s", name) 819 } 820 actual := f(tc.in, tc.arg) 821 if actual != tc.out { 822 b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out) 823 } 824 } 825 } 826 } 827 828 type predicate struct { 829 f func(rune) bool 830 name string 831 } 832 833 var isSpace = predicate{unicode.IsSpace, "IsSpace"} 834 var isDigit = predicate{unicode.IsDigit, "IsDigit"} 835 var isUpper = predicate{unicode.IsUpper, "IsUpper"} 836 var isValidRune = predicate{ 837 func(r rune) bool { 838 return r != utf8.RuneError 839 }, 840 "IsValidRune", 841 } 842 843 func not(p predicate) predicate { 844 return predicate{ 845 func(r rune) bool { 846 return !p.f(r) 847 }, 848 "not " + p.name, 849 } 850 } 851 852 var trimFuncTests = []struct { 853 f predicate 854 in, out string 855 }{ 856 {isSpace, space + " hello " + space, "hello"}, 857 {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"}, 858 {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"}, 859 {not(isSpace), "hello" + space + "hello", space}, 860 {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"}, 861 {isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"}, 862 {not(isValidRune), "\xc0a\xc0", "a"}, 863 } 864 865 func TestTrimFunc(t *testing.T) { 866 for _, tc := range trimFuncTests { 867 actual := TrimFunc(tc.in, tc.f.f) 868 if actual != tc.out { 869 t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out) 870 } 871 } 872 } 873 874 var indexFuncTests = []struct { 875 in string 876 f predicate 877 first, last int 878 }{ 879 {"", isValidRune, -1, -1}, 880 {"abc", isDigit, -1, -1}, 881 {"0123", isDigit, 0, 3}, 882 {"a1b", isDigit, 1, 1}, 883 {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes 884 {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18}, 885 {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34}, 886 {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12}, 887 888 // tests of invalid UTF-8 889 {"\x801", isDigit, 1, 1}, 890 {"\x80abc", isDigit, -1, -1}, 891 {"\xc0a\xc0", isValidRune, 1, 1}, 892 {"\xc0a\xc0", not(isValidRune), 0, 2}, 893 {"\xc0☺\xc0", not(isValidRune), 0, 4}, 894 {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5}, 895 {"ab\xc0a\xc0cd", not(isValidRune), 2, 4}, 896 {"a\xe0\x80cd", not(isValidRune), 1, 2}, 897 {"\x80\x80\x80\x80", not(isValidRune), 0, 3}, 898 } 899 900 func TestIndexFunc(t *testing.T) { 901 for _, tc := range indexFuncTests { 902 first := IndexFunc(tc.in, tc.f.f) 903 if first != tc.first { 904 t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first) 905 } 906 last := LastIndexFunc(tc.in, tc.f.f) 907 if last != tc.last { 908 t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last) 909 } 910 } 911 } 912 913 func equal(m string, s1, s2 string, t *testing.T) bool { 914 if s1 == s2 { 915 return true 916 } 917 e1 := Split(s1, "") 918 e2 := Split(s2, "") 919 for i, c1 := range e1 { 920 if i >= len(e2) { 921 break 922 } 923 r1, _ := utf8.DecodeRuneInString(c1) 924 r2, _ := utf8.DecodeRuneInString(e2[i]) 925 if r1 != r2 { 926 t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2) 927 } 928 } 929 return false 930 } 931 932 func TestCaseConsistency(t *testing.T) { 933 // Make a string of all the runes. 934 numRunes := int(unicode.MaxRune + 1) 935 if testing.Short() { 936 numRunes = 1000 937 } 938 a := make([]rune, numRunes) 939 for i := range a { 940 a[i] = rune(i) 941 } 942 s := string(a) 943 // convert the cases. 944 upper := ToUpper(s) 945 lower := ToLower(s) 946 947 // Consistency checks 948 if n := utf8.RuneCountInString(upper); n != numRunes { 949 t.Error("rune count wrong in upper:", n) 950 } 951 if n := utf8.RuneCountInString(lower); n != numRunes { 952 t.Error("rune count wrong in lower:", n) 953 } 954 if !equal("ToUpper(upper)", ToUpper(upper), upper, t) { 955 t.Error("ToUpper(upper) consistency fail") 956 } 957 if !equal("ToLower(lower)", ToLower(lower), lower, t) { 958 t.Error("ToLower(lower) consistency fail") 959 } 960 /* 961 These fail because of non-one-to-oneness of the data, such as multiple 962 upper case 'I' mapping to 'i'. We comment them out but keep them for 963 interest. 964 For instance: CAPITAL LETTER I WITH DOT ABOVE: 965 unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130' 966 967 if !equal("ToUpper(lower)", ToUpper(lower), upper, t) { 968 t.Error("ToUpper(lower) consistency fail"); 969 } 970 if !equal("ToLower(upper)", ToLower(upper), lower, t) { 971 t.Error("ToLower(upper) consistency fail"); 972 } 973 */ 974 } 975 976 var RepeatTests = []struct { 977 in, out string 978 count int 979 }{ 980 {"", "", 0}, 981 {"", "", 1}, 982 {"", "", 2}, 983 {"-", "", 0}, 984 {"-", "-", 1}, 985 {"-", "----------", 10}, 986 {"abc ", "abc abc abc ", 3}, 987 } 988 989 func TestRepeat(t *testing.T) { 990 for _, tt := range RepeatTests { 991 a := Repeat(tt.in, tt.count) 992 if !equal("Repeat(s)", a, tt.out, t) { 993 t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out) 994 continue 995 } 996 } 997 } 998 999 func repeat(s string, count int) (err error) { 1000 defer func() { 1001 if r := recover(); r != nil { 1002 switch v := r.(type) { 1003 case error: 1004 err = v 1005 default: 1006 err = fmt.Errorf("%s", v) 1007 } 1008 } 1009 }() 1010 1011 Repeat(s, count) 1012 1013 return 1014 } 1015 1016 // See Issue golang.org/issue/16237 1017 func TestRepeatCatchesOverflow(t *testing.T) { 1018 tests := [...]struct { 1019 s string 1020 count int 1021 errStr string 1022 }{ 1023 0: {"--", -2147483647, "negative"}, 1024 1: {"", int(^uint(0) >> 1), ""}, 1025 2: {"-", 10, ""}, 1026 3: {"gopher", 0, ""}, 1027 4: {"-", -1, "negative"}, 1028 5: {"--", -102, "negative"}, 1029 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"}, 1030 } 1031 1032 for i, tt := range tests { 1033 err := repeat(tt.s, tt.count) 1034 if tt.errStr == "" { 1035 if err != nil { 1036 t.Errorf("#%d panicked %v", i, err) 1037 } 1038 continue 1039 } 1040 1041 if err == nil || !Contains(err.Error(), tt.errStr) { 1042 t.Errorf("#%d expected %q got %q", i, tt.errStr, err) 1043 } 1044 } 1045 } 1046 1047 func runesEqual(a, b []rune) bool { 1048 if len(a) != len(b) { 1049 return false 1050 } 1051 for i, r := range a { 1052 if r != b[i] { 1053 return false 1054 } 1055 } 1056 return true 1057 } 1058 1059 var RunesTests = []struct { 1060 in string 1061 out []rune 1062 lossy bool 1063 }{ 1064 {"", []rune{}, false}, 1065 {" ", []rune{32}, false}, 1066 {"ABC", []rune{65, 66, 67}, false}, 1067 {"abc", []rune{97, 98, 99}, false}, 1068 {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, 1069 {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, 1070 {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, 1071 } 1072 1073 func TestRunes(t *testing.T) { 1074 for _, tt := range RunesTests { 1075 a := []rune(tt.in) 1076 if !runesEqual(a, tt.out) { 1077 t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out) 1078 continue 1079 } 1080 if !tt.lossy { 1081 // can only test reassembly if we didn't lose information 1082 s := string(a) 1083 if s != tt.in { 1084 t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in) 1085 } 1086 } 1087 } 1088 } 1089 1090 func TestReadByte(t *testing.T) { 1091 testStrings := []string{"", abcd, faces, commas} 1092 for _, s := range testStrings { 1093 reader := NewReader(s) 1094 if e := reader.UnreadByte(); e == nil { 1095 t.Errorf("Unreading %q at beginning: expected error", s) 1096 } 1097 var res bytes.Buffer 1098 for { 1099 b, e := reader.ReadByte() 1100 if e == io.EOF { 1101 break 1102 } 1103 if e != nil { 1104 t.Errorf("Reading %q: %s", s, e) 1105 break 1106 } 1107 res.WriteByte(b) 1108 // unread and read again 1109 e = reader.UnreadByte() 1110 if e != nil { 1111 t.Errorf("Unreading %q: %s", s, e) 1112 break 1113 } 1114 b1, e := reader.ReadByte() 1115 if e != nil { 1116 t.Errorf("Reading %q after unreading: %s", s, e) 1117 break 1118 } 1119 if b1 != b { 1120 t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1) 1121 break 1122 } 1123 } 1124 if res.String() != s { 1125 t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String()) 1126 } 1127 } 1128 } 1129 1130 func TestReadRune(t *testing.T) { 1131 testStrings := []string{"", abcd, faces, commas} 1132 for _, s := range testStrings { 1133 reader := NewReader(s) 1134 if e := reader.UnreadRune(); e == nil { 1135 t.Errorf("Unreading %q at beginning: expected error", s) 1136 } 1137 res := "" 1138 for { 1139 r, z, e := reader.ReadRune() 1140 if e == io.EOF { 1141 break 1142 } 1143 if e != nil { 1144 t.Errorf("Reading %q: %s", s, e) 1145 break 1146 } 1147 res += string(r) 1148 // unread and read again 1149 e = reader.UnreadRune() 1150 if e != nil { 1151 t.Errorf("Unreading %q: %s", s, e) 1152 break 1153 } 1154 r1, z1, e := reader.ReadRune() 1155 if e != nil { 1156 t.Errorf("Reading %q after unreading: %s", s, e) 1157 break 1158 } 1159 if r1 != r { 1160 t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1) 1161 break 1162 } 1163 if z1 != z { 1164 t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1) 1165 break 1166 } 1167 } 1168 if res != s { 1169 t.Errorf("Reader(%q).ReadRune() produced %q", s, res) 1170 } 1171 } 1172 } 1173 1174 var UnreadRuneErrorTests = []struct { 1175 name string 1176 f func(*Reader) 1177 }{ 1178 {"Read", func(r *Reader) { r.Read([]byte{0}) }}, 1179 {"ReadByte", func(r *Reader) { r.ReadByte() }}, 1180 {"UnreadRune", func(r *Reader) { r.UnreadRune() }}, 1181 {"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }}, 1182 {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }}, 1183 } 1184 1185 func TestUnreadRuneError(t *testing.T) { 1186 for _, tt := range UnreadRuneErrorTests { 1187 reader := NewReader("0123456789") 1188 if _, _, err := reader.ReadRune(); err != nil { 1189 // should not happen 1190 t.Fatal(err) 1191 } 1192 tt.f(reader) 1193 err := reader.UnreadRune() 1194 if err == nil { 1195 t.Errorf("Unreading after %s: expected error", tt.name) 1196 } 1197 } 1198 } 1199 1200 var ReplaceTests = []struct { 1201 in string 1202 old, new string 1203 n int 1204 out string 1205 }{ 1206 {"hello", "l", "L", 0, "hello"}, 1207 {"hello", "l", "L", -1, "heLLo"}, 1208 {"hello", "x", "X", -1, "hello"}, 1209 {"", "x", "X", -1, ""}, 1210 {"radar", "r", "<r>", -1, "<r>ada<r>"}, 1211 {"", "", "<>", -1, "<>"}, 1212 {"banana", "a", "<>", -1, "b<>n<>n<>"}, 1213 {"banana", "a", "<>", 1, "b<>nana"}, 1214 {"banana", "a", "<>", 1000, "b<>n<>n<>"}, 1215 {"banana", "an", "<>", -1, "b<><>a"}, 1216 {"banana", "ana", "<>", -1, "b<>na"}, 1217 {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"}, 1218 {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"}, 1219 {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"}, 1220 {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"}, 1221 {"banana", "", "<>", 1, "<>banana"}, 1222 {"banana", "a", "a", -1, "banana"}, 1223 {"banana", "a", "a", 1, "banana"}, 1224 {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"}, 1225 } 1226 1227 func TestReplace(t *testing.T) { 1228 for _, tt := range ReplaceTests { 1229 if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out { 1230 t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out) 1231 } 1232 } 1233 } 1234 1235 var TitleTests = []struct { 1236 in, out string 1237 }{ 1238 {"", ""}, 1239 {"a", "A"}, 1240 {" aaa aaa aaa ", " Aaa Aaa Aaa "}, 1241 {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "}, 1242 {"123a456", "123a456"}, 1243 {"double-blind", "Double-Blind"}, 1244 {"ÿøû", "Ÿøû"}, 1245 {"with_underscore", "With_underscore"}, 1246 {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"}, 1247 } 1248 1249 func TestTitle(t *testing.T) { 1250 for _, tt := range TitleTests { 1251 if s := Title(tt.in); s != tt.out { 1252 t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out) 1253 } 1254 } 1255 } 1256 1257 var ContainsTests = []struct { 1258 str, substr string 1259 expected bool 1260 }{ 1261 {"abc", "bc", true}, 1262 {"abc", "bcd", false}, 1263 {"abc", "", true}, 1264 {"", "a", false}, 1265 1266 // cases to cover code in runtime/asm_amd64.s:indexShortStr 1267 // 2-byte needle 1268 {"xxxxxx", "01", false}, 1269 {"01xxxx", "01", true}, 1270 {"xx01xx", "01", true}, 1271 {"xxxx01", "01", true}, 1272 {"01xxxxx"[1:], "01", false}, 1273 {"xxxxx01"[:6], "01", false}, 1274 // 3-byte needle 1275 {"xxxxxxx", "012", false}, 1276 {"012xxxx", "012", true}, 1277 {"xx012xx", "012", true}, 1278 {"xxxx012", "012", true}, 1279 {"012xxxxx"[1:], "012", false}, 1280 {"xxxxx012"[:7], "012", false}, 1281 // 4-byte needle 1282 {"xxxxxxxx", "0123", false}, 1283 {"0123xxxx", "0123", true}, 1284 {"xx0123xx", "0123", true}, 1285 {"xxxx0123", "0123", true}, 1286 {"0123xxxxx"[1:], "0123", false}, 1287 {"xxxxx0123"[:8], "0123", false}, 1288 // 5-7-byte needle 1289 {"xxxxxxxxx", "01234", false}, 1290 {"01234xxxx", "01234", true}, 1291 {"xx01234xx", "01234", true}, 1292 {"xxxx01234", "01234", true}, 1293 {"01234xxxxx"[1:], "01234", false}, 1294 {"xxxxx01234"[:9], "01234", false}, 1295 // 8-byte needle 1296 {"xxxxxxxxxxxx", "01234567", false}, 1297 {"01234567xxxx", "01234567", true}, 1298 {"xx01234567xx", "01234567", true}, 1299 {"xxxx01234567", "01234567", true}, 1300 {"01234567xxxxx"[1:], "01234567", false}, 1301 {"xxxxx01234567"[:12], "01234567", false}, 1302 // 9-15-byte needle 1303 {"xxxxxxxxxxxxx", "012345678", false}, 1304 {"012345678xxxx", "012345678", true}, 1305 {"xx012345678xx", "012345678", true}, 1306 {"xxxx012345678", "012345678", true}, 1307 {"012345678xxxxx"[1:], "012345678", false}, 1308 {"xxxxx012345678"[:13], "012345678", false}, 1309 // 16-byte needle 1310 {"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false}, 1311 {"0123456789ABCDEFxxxx", "0123456789ABCDEF", true}, 1312 {"xx0123456789ABCDEFxx", "0123456789ABCDEF", true}, 1313 {"xxxx0123456789ABCDEF", "0123456789ABCDEF", true}, 1314 {"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false}, 1315 {"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false}, 1316 // 17-31-byte needle 1317 {"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false}, 1318 {"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true}, 1319 {"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true}, 1320 {"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true}, 1321 {"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false}, 1322 {"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false}, 1323 1324 // partial match cases 1325 {"xx01x", "012", false}, // 3 1326 {"xx0123x", "01234", false}, // 5-7 1327 {"xx01234567x", "012345678", false}, // 9-15 1328 {"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679 1329 } 1330 1331 func TestContains(t *testing.T) { 1332 for _, ct := range ContainsTests { 1333 if Contains(ct.str, ct.substr) != ct.expected { 1334 t.Errorf("Contains(%s, %s) = %v, want %v", 1335 ct.str, ct.substr, !ct.expected, ct.expected) 1336 } 1337 } 1338 } 1339 1340 var ContainsAnyTests = []struct { 1341 str, substr string 1342 expected bool 1343 }{ 1344 {"", "", false}, 1345 {"", "a", false}, 1346 {"", "abc", false}, 1347 {"a", "", false}, 1348 {"a", "a", true}, 1349 {"aaa", "a", true}, 1350 {"abc", "xyz", false}, 1351 {"abc", "xcz", true}, 1352 {"a☺b☻c☹d", "uvw☻xyz", true}, 1353 {"aRegExp*", ".(|)*+?^$[]", true}, 1354 {dots + dots + dots, " ", false}, 1355 } 1356 1357 func TestContainsAny(t *testing.T) { 1358 for _, ct := range ContainsAnyTests { 1359 if ContainsAny(ct.str, ct.substr) != ct.expected { 1360 t.Errorf("ContainsAny(%s, %s) = %v, want %v", 1361 ct.str, ct.substr, !ct.expected, ct.expected) 1362 } 1363 } 1364 } 1365 1366 var ContainsRuneTests = []struct { 1367 str string 1368 r rune 1369 expected bool 1370 }{ 1371 {"", 'a', false}, 1372 {"a", 'a', true}, 1373 {"aaa", 'a', true}, 1374 {"abc", 'y', false}, 1375 {"abc", 'c', true}, 1376 {"a☺b☻c☹d", 'x', false}, 1377 {"a☺b☻c☹d", '☻', true}, 1378 {"aRegExp*", '*', true}, 1379 } 1380 1381 func TestContainsRune(t *testing.T) { 1382 for _, ct := range ContainsRuneTests { 1383 if ContainsRune(ct.str, ct.r) != ct.expected { 1384 t.Errorf("ContainsRune(%q, %q) = %v, want %v", 1385 ct.str, ct.r, !ct.expected, ct.expected) 1386 } 1387 } 1388 } 1389 1390 var EqualFoldTests = []struct { 1391 s, t string 1392 out bool 1393 }{ 1394 {"abc", "abc", true}, 1395 {"ABcd", "ABcd", true}, 1396 {"123abc", "123ABC", true}, 1397 {"αβδ", "ΑΒΔ", true}, 1398 {"abc", "xyz", false}, 1399 {"abc", "XYZ", false}, 1400 {"abcdefghijk", "abcdefghijX", false}, 1401 {"abcdefghijk", "abcdefghij\u212A", true}, 1402 {"abcdefghijK", "abcdefghij\u212A", true}, 1403 {"abcdefghijkz", "abcdefghij\u212Ay", false}, 1404 {"abcdefghijKz", "abcdefghij\u212Ay", false}, 1405 } 1406 1407 func TestEqualFold(t *testing.T) { 1408 for _, tt := range EqualFoldTests { 1409 if out := EqualFold(tt.s, tt.t); out != tt.out { 1410 t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out) 1411 } 1412 if out := EqualFold(tt.t, tt.s); out != tt.out { 1413 t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out) 1414 } 1415 } 1416 } 1417 1418 var CountTests = []struct { 1419 s, sep string 1420 num int 1421 }{ 1422 {"", "", 1}, 1423 {"", "notempty", 0}, 1424 {"notempty", "", 9}, 1425 {"smaller", "not smaller", 0}, 1426 {"12345678987654321", "6", 2}, 1427 {"611161116", "6", 3}, 1428 {"notequal", "NotEqual", 0}, 1429 {"equal", "equal", 1}, 1430 {"abc1231231123q", "123", 3}, 1431 {"11111", "11", 2}, 1432 } 1433 1434 func TestCount(t *testing.T) { 1435 for _, tt := range CountTests { 1436 if num := Count(tt.s, tt.sep); num != tt.num { 1437 t.Errorf("Count(\"%s\", \"%s\") = %d, want %d", tt.s, tt.sep, num, tt.num) 1438 } 1439 } 1440 } 1441 1442 func makeBenchInputHard() string { 1443 tokens := [...]string{ 1444 "<a>", "<p>", "<b>", "<strong>", 1445 "</a>", "</p>", "</b>", "</strong>", 1446 "hello", "world", 1447 } 1448 x := make([]byte, 0, 1<<20) 1449 for { 1450 i := rand.Intn(len(tokens)) 1451 if len(x)+len(tokens[i]) >= 1<<20 { 1452 break 1453 } 1454 x = append(x, tokens[i]...) 1455 } 1456 return string(x) 1457 } 1458 1459 var benchInputHard = makeBenchInputHard() 1460 1461 func benchmarkIndexHard(b *testing.B, sep string) { 1462 for i := 0; i < b.N; i++ { 1463 Index(benchInputHard, sep) 1464 } 1465 } 1466 1467 func benchmarkLastIndexHard(b *testing.B, sep string) { 1468 for i := 0; i < b.N; i++ { 1469 LastIndex(benchInputHard, sep) 1470 } 1471 } 1472 1473 func benchmarkCountHard(b *testing.B, sep string) { 1474 for i := 0; i < b.N; i++ { 1475 Count(benchInputHard, sep) 1476 } 1477 } 1478 1479 func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") } 1480 func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") } 1481 func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") } 1482 func BenchmarkIndexHard4(b *testing.B) { 1483 benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>") 1484 } 1485 1486 func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") } 1487 func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") } 1488 func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") } 1489 1490 func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") } 1491 func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") } 1492 func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") } 1493 1494 var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10) 1495 var benchNeedleTorture = Repeat("ABC", 1<<10+1) 1496 1497 func BenchmarkIndexTorture(b *testing.B) { 1498 for i := 0; i < b.N; i++ { 1499 Index(benchInputTorture, benchNeedleTorture) 1500 } 1501 } 1502 1503 func BenchmarkCountTorture(b *testing.B) { 1504 for i := 0; i < b.N; i++ { 1505 Count(benchInputTorture, benchNeedleTorture) 1506 } 1507 } 1508 1509 func BenchmarkCountTortureOverlapping(b *testing.B) { 1510 A := Repeat("ABC", 1<<20) 1511 B := Repeat("ABC", 1<<10) 1512 for i := 0; i < b.N; i++ { 1513 Count(A, B) 1514 } 1515 } 1516 1517 func BenchmarkCountByte(b *testing.B) { 1518 indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20} 1519 benchStr := Repeat(benchmarkString, 1520 (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString)) 1521 benchFunc := func(b *testing.B, benchStr string) { 1522 b.SetBytes(int64(len(benchStr))) 1523 for i := 0; i < b.N; i++ { 1524 Count(benchStr, "=") 1525 } 1526 } 1527 for _, size := range indexSizes { 1528 b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { 1529 benchFunc(b, benchStr[:size]) 1530 }) 1531 } 1532 1533 } 1534 1535 var makeFieldsInput = func() string { 1536 x := make([]byte, 1<<20) 1537 // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space. 1538 for i := range x { 1539 switch rand.Intn(10) { 1540 case 0: 1541 x[i] = ' ' 1542 case 1: 1543 if i > 0 && x[i-1] == 'x' { 1544 copy(x[i-1:], "χ") 1545 break 1546 } 1547 fallthrough 1548 default: 1549 x[i] = 'x' 1550 } 1551 } 1552 return string(x) 1553 } 1554 1555 var makeFieldsInputASCII = func() string { 1556 x := make([]byte, 1<<20) 1557 // Input is ~10% space, rest ASCII non-space. 1558 for i := range x { 1559 if rand.Intn(10) == 0 { 1560 x[i] = ' ' 1561 } else { 1562 x[i] = 'x' 1563 } 1564 } 1565 return string(x) 1566 } 1567 1568 var stringdata = []struct{ name, data string }{ 1569 {"ASCII", makeFieldsInputASCII()}, 1570 {"Mixed", makeFieldsInput()}, 1571 } 1572 1573 func BenchmarkFields(b *testing.B) { 1574 for _, sd := range stringdata { 1575 b.Run(sd.name, func(b *testing.B) { 1576 for j := 1 << 4; j <= 1<<20; j <<= 4 { 1577 b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { 1578 b.ReportAllocs() 1579 b.SetBytes(int64(j)) 1580 data := sd.data[:j] 1581 for i := 0; i < b.N; i++ { 1582 Fields(data) 1583 } 1584 }) 1585 } 1586 }) 1587 } 1588 } 1589 1590 func BenchmarkFieldsFunc(b *testing.B) { 1591 for _, sd := range stringdata { 1592 b.Run(sd.name, func(b *testing.B) { 1593 for j := 1 << 4; j <= 1<<20; j <<= 4 { 1594 b.Run(fmt.Sprintf("%d", j), func(b *testing.B) { 1595 b.ReportAllocs() 1596 b.SetBytes(int64(j)) 1597 data := sd.data[:j] 1598 for i := 0; i < b.N; i++ { 1599 FieldsFunc(data, unicode.IsSpace) 1600 } 1601 }) 1602 } 1603 }) 1604 } 1605 } 1606 1607 func BenchmarkSplitEmptySeparator(b *testing.B) { 1608 for i := 0; i < b.N; i++ { 1609 Split(benchInputHard, "") 1610 } 1611 } 1612 1613 func BenchmarkSplitSingleByteSeparator(b *testing.B) { 1614 for i := 0; i < b.N; i++ { 1615 Split(benchInputHard, "/") 1616 } 1617 } 1618 1619 func BenchmarkSplitMultiByteSeparator(b *testing.B) { 1620 for i := 0; i < b.N; i++ { 1621 Split(benchInputHard, "hello") 1622 } 1623 } 1624 1625 func BenchmarkSplitNSingleByteSeparator(b *testing.B) { 1626 for i := 0; i < b.N; i++ { 1627 SplitN(benchInputHard, "/", 10) 1628 } 1629 } 1630 1631 func BenchmarkSplitNMultiByteSeparator(b *testing.B) { 1632 for i := 0; i < b.N; i++ { 1633 SplitN(benchInputHard, "hello", 10) 1634 } 1635 } 1636 1637 func BenchmarkRepeat(b *testing.B) { 1638 for i := 0; i < b.N; i++ { 1639 Repeat("-", 80) 1640 } 1641 } 1642 1643 func BenchmarkIndexAnyASCII(b *testing.B) { 1644 x := Repeat("#", 4096) // Never matches set 1645 cs := "0123456789abcdef" 1646 for k := 1; k <= 4096; k <<= 4 { 1647 for j := 1; j <= 16; j <<= 1 { 1648 b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { 1649 for i := 0; i < b.N; i++ { 1650 IndexAny(x[:k], cs[:j]) 1651 } 1652 }) 1653 } 1654 } 1655 } 1656 1657 func BenchmarkTrimASCII(b *testing.B) { 1658 cs := "0123456789abcdef" 1659 for k := 1; k <= 4096; k <<= 4 { 1660 for j := 1; j <= 16; j <<= 1 { 1661 b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) { 1662 x := Repeat(cs[:j], k) // Always matches set 1663 for i := 0; i < b.N; i++ { 1664 Trim(x[:k], cs[:j]) 1665 } 1666 }) 1667 } 1668 } 1669 } 1670 1671 func BenchmarkIndexPeriodic(b *testing.B) { 1672 key := "aa" 1673 for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { 1674 b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) { 1675 s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip) 1676 for i := 0; i < b.N; i++ { 1677 Index(s, key) 1678 } 1679 }) 1680 } 1681 }