github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/strings/strings.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate UTF-8 encoded strings. 6 // 7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8 package strings 9 10 import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14 ) 15 16 // explode splits s into a slice of UTF-8 strings, 17 // one string per Unicode character up to a maximum of n (n < 0 means no limit). 18 // Invalid UTF-8 sequences become correct encodings of U+FFFD. 19 func explode(s string, n int) []string { 20 l := utf8.RuneCountInString(s) 21 if n < 0 || n > l { 22 n = l 23 } 24 a := make([]string, n) 25 for i := 0; i < n-1; i++ { 26 ch, size := utf8.DecodeRuneInString(s) 27 a[i] = s[:size] 28 s = s[size:] 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } 32 } 33 if n > 0 { 34 a[n-1] = s 35 } 36 return a 37 } 38 39 // primeRK is the prime base used in Rabin-Karp algorithm. 40 const primeRK = 16777619 41 42 // hashStr returns the hash and the appropriate multiplicative 43 // factor for use in Rabin-Karp algorithm. 44 func hashStr(sep string) (uint32, uint32) { 45 hash := uint32(0) 46 for i := 0; i < len(sep); i++ { 47 hash = hash*primeRK + uint32(sep[i]) 48 } 49 var pow, sq uint32 = 1, primeRK 50 for i := len(sep); i > 0; i >>= 1 { 51 if i&1 != 0 { 52 pow *= sq 53 } 54 sq *= sq 55 } 56 return hash, pow 57 } 58 59 // hashStrRev returns the hash of the reverse of sep and the 60 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 61 func hashStrRev(sep string) (uint32, uint32) { 62 hash := uint32(0) 63 for i := len(sep) - 1; i >= 0; i-- { 64 hash = hash*primeRK + uint32(sep[i]) 65 } 66 var pow, sq uint32 = 1, primeRK 67 for i := len(sep); i > 0; i >>= 1 { 68 if i&1 != 0 { 69 pow *= sq 70 } 71 sq *= sq 72 } 73 return hash, pow 74 } 75 76 // Count counts the number of non-overlapping instances of substr in s. 77 // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 78 func Count(s, substr string) int { 79 // special case 80 if len(substr) == 0 { 81 return utf8.RuneCountInString(s) + 1 82 } 83 if len(substr) == 1 { 84 return bytealg.CountString(s, substr[0]) 85 } 86 n := 0 87 for { 88 i := Index(s, substr) 89 if i == -1 { 90 return n 91 } 92 n++ 93 s = s[i+len(substr):] 94 } 95 } 96 97 // Contains reports whether substr is within s. 98 func Contains(s, substr string) bool { 99 return Index(s, substr) >= 0 100 } 101 102 // ContainsAny reports whether any Unicode code points in chars are within s. 103 func ContainsAny(s, chars string) bool { 104 return IndexAny(s, chars) >= 0 105 } 106 107 // ContainsRune reports whether the Unicode code point r is within s. 108 func ContainsRune(s string, r rune) bool { 109 return IndexRune(s, r) >= 0 110 } 111 112 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 113 func LastIndex(s, substr string) int { 114 n := len(substr) 115 switch { 116 case n == 0: 117 return len(s) 118 case n == 1: 119 return LastIndexByte(s, substr[0]) 120 case n == len(s): 121 if substr == s { 122 return 0 123 } 124 return -1 125 case n > len(s): 126 return -1 127 } 128 // Rabin-Karp search from the end of the string 129 hashss, pow := hashStrRev(substr) 130 last := len(s) - n 131 var h uint32 132 for i := len(s) - 1; i >= last; i-- { 133 h = h*primeRK + uint32(s[i]) 134 } 135 if h == hashss && s[last:] == substr { 136 return last 137 } 138 for i := last - 1; i >= 0; i-- { 139 h *= primeRK 140 h += uint32(s[i]) 141 h -= pow * uint32(s[i+n]) 142 if h == hashss && s[i:i+n] == substr { 143 return i 144 } 145 } 146 return -1 147 } 148 149 // IndexRune returns the index of the first instance of the Unicode code point 150 // r, or -1 if rune is not present in s. 151 // If r is utf8.RuneError, it returns the first instance of any 152 // invalid UTF-8 byte sequence. 153 func IndexRune(s string, r rune) int { 154 switch { 155 case 0 <= r && r < utf8.RuneSelf: 156 return IndexByte(s, byte(r)) 157 case r == utf8.RuneError: 158 for i, r := range s { 159 if r == utf8.RuneError { 160 return i 161 } 162 } 163 return -1 164 case !utf8.ValidRune(r): 165 return -1 166 default: 167 return Index(s, string(r)) 168 } 169 } 170 171 // IndexAny returns the index of the first instance of any Unicode code point 172 // from chars in s, or -1 if no Unicode code point from chars is present in s. 173 func IndexAny(s, chars string) int { 174 if chars == "" { 175 // Avoid scanning all of s. 176 return -1 177 } 178 if len(s) > 8 { 179 if as, isASCII := makeASCIISet(chars); isASCII { 180 for i := 0; i < len(s); i++ { 181 if as.contains(s[i]) { 182 return i 183 } 184 } 185 return -1 186 } 187 } 188 for i, c := range s { 189 for _, m := range chars { 190 if c == m { 191 return i 192 } 193 } 194 } 195 return -1 196 } 197 198 // LastIndexAny returns the index of the last instance of any Unicode code 199 // point from chars in s, or -1 if no Unicode code point from chars is 200 // present in s. 201 func LastIndexAny(s, chars string) int { 202 if chars == "" { 203 // Avoid scanning all of s. 204 return -1 205 } 206 if len(s) > 8 { 207 if as, isASCII := makeASCIISet(chars); isASCII { 208 for i := len(s) - 1; i >= 0; i-- { 209 if as.contains(s[i]) { 210 return i 211 } 212 } 213 return -1 214 } 215 } 216 for i := len(s); i > 0; { 217 r, size := utf8.DecodeLastRuneInString(s[:i]) 218 i -= size 219 for _, c := range chars { 220 if r == c { 221 return i 222 } 223 } 224 } 225 return -1 226 } 227 228 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 229 func LastIndexByte(s string, c byte) int { 230 for i := len(s) - 1; i >= 0; i-- { 231 if s[i] == c { 232 return i 233 } 234 } 235 return -1 236 } 237 238 // Generic split: splits after each instance of sep, 239 // including sepSave bytes of sep in the subarrays. 240 func genSplit(s, sep string, sepSave, n int) []string { 241 if n == 0 { 242 return nil 243 } 244 if sep == "" { 245 return explode(s, n) 246 } 247 if n < 0 { 248 n = Count(s, sep) + 1 249 } 250 251 a := make([]string, n) 252 n-- 253 i := 0 254 for i < n { 255 m := Index(s, sep) 256 if m < 0 { 257 break 258 } 259 a[i] = s[:m+sepSave] 260 s = s[m+len(sep):] 261 i++ 262 } 263 a[i] = s 264 return a[:i+1] 265 } 266 267 // SplitN slices s into substrings separated by sep and returns a slice of 268 // the substrings between those separators. 269 // 270 // The count determines the number of substrings to return: 271 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 272 // n == 0: the result is nil (zero substrings) 273 // n < 0: all substrings 274 // 275 // Edge cases for s and sep (for example, empty strings) are handled 276 // as described in the documentation for Split. 277 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 278 279 // SplitAfterN slices s into substrings after each instance of sep and 280 // returns a slice of those substrings. 281 // 282 // The count determines the number of substrings to return: 283 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 284 // n == 0: the result is nil (zero substrings) 285 // n < 0: all substrings 286 // 287 // Edge cases for s and sep (for example, empty strings) are handled 288 // as described in the documentation for SplitAfter. 289 func SplitAfterN(s, sep string, n int) []string { 290 return genSplit(s, sep, len(sep), n) 291 } 292 293 // Split slices s into all substrings separated by sep and returns a slice of 294 // the substrings between those separators. 295 // 296 // If s does not contain sep and sep is not empty, Split returns a 297 // slice of length 1 whose only element is s. 298 // 299 // If sep is empty, Split splits after each UTF-8 sequence. If both s 300 // and sep are empty, Split returns an empty slice. 301 // 302 // It is equivalent to SplitN with a count of -1. 303 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 304 305 // SplitAfter slices s into all substrings after each instance of sep and 306 // returns a slice of those substrings. 307 // 308 // If s does not contain sep and sep is not empty, SplitAfter returns 309 // a slice of length 1 whose only element is s. 310 // 311 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If 312 // both s and sep are empty, SplitAfter returns an empty slice. 313 // 314 // It is equivalent to SplitAfterN with a count of -1. 315 func SplitAfter(s, sep string) []string { 316 return genSplit(s, sep, len(sep), -1) 317 } 318 319 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 320 321 // Fields splits the string s around each instance of one or more consecutive white space 322 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 323 // empty slice if s contains only white space. 324 func Fields(s string) []string { 325 // First count the fields. 326 // This is an exact count if s is ASCII, otherwise it is an approximation. 327 n := 0 328 wasSpace := 1 329 // setBits is used to track which bits are set in the bytes of s. 330 setBits := uint8(0) 331 for i := 0; i < len(s); i++ { 332 r := s[i] 333 setBits |= r 334 isSpace := int(asciiSpace[r]) 335 n += wasSpace & ^isSpace 336 wasSpace = isSpace 337 } 338 339 if setBits < utf8.RuneSelf { // ASCII fast path 340 a := make([]string, n) 341 na := 0 342 fieldStart := 0 343 i := 0 344 // Skip spaces in the front of the input. 345 for i < len(s) && asciiSpace[s[i]] != 0 { 346 i++ 347 } 348 fieldStart = i 349 for i < len(s) { 350 if asciiSpace[s[i]] == 0 { 351 i++ 352 continue 353 } 354 a[na] = s[fieldStart:i] 355 na++ 356 i++ 357 // Skip spaces in between fields. 358 for i < len(s) && asciiSpace[s[i]] != 0 { 359 i++ 360 } 361 fieldStart = i 362 } 363 if fieldStart < len(s) { // Last field might end at EOF. 364 a[na] = s[fieldStart:] 365 } 366 return a 367 } 368 369 // Some runes in the input string are not ASCII. 370 return FieldsFunc(s, unicode.IsSpace) 371 } 372 373 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 374 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 375 // string is empty, an empty slice is returned. 376 // FieldsFunc makes no guarantees about the order in which it calls f(c). 377 // If f does not return consistent results for a given c, FieldsFunc may crash. 378 func FieldsFunc(s string, f func(rune) bool) []string { 379 // A span is used to record a slice of s of the form s[start:end]. 380 // The start index is inclusive and the end index is exclusive. 381 type span struct { 382 start int 383 end int 384 } 385 spans := make([]span, 0, 32) 386 387 // Find the field start and end indices. 388 wasField := false 389 fromIndex := 0 390 for i, rune := range s { 391 if f(rune) { 392 if wasField { 393 spans = append(spans, span{start: fromIndex, end: i}) 394 wasField = false 395 } 396 } else { 397 if !wasField { 398 fromIndex = i 399 wasField = true 400 } 401 } 402 } 403 404 // Last field might end at EOF. 405 if wasField { 406 spans = append(spans, span{fromIndex, len(s)}) 407 } 408 409 // Create strings from recorded field indices. 410 a := make([]string, len(spans)) 411 for i, span := range spans { 412 a[i] = s[span.start:span.end] 413 } 414 415 return a 416 } 417 418 // Join concatenates the elements of a to create a single string. The separator string 419 // sep is placed between elements in the resulting string. 420 func Join(a []string, sep string) string { 421 switch len(a) { 422 case 0: 423 return "" 424 case 1: 425 return a[0] 426 } 427 n := len(sep) * (len(a) - 1) 428 for i := 0; i < len(a); i++ { 429 n += len(a[i]) 430 } 431 432 var b Builder 433 b.Grow(n) 434 b.WriteString(a[0]) 435 for _, s := range a[1:] { 436 b.WriteString(sep) 437 b.WriteString(s) 438 } 439 return b.String() 440 } 441 442 // HasPrefix tests whether the string s begins with prefix. 443 func HasPrefix(s, prefix string) bool { 444 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 445 } 446 447 // HasSuffix tests whether the string s ends with suffix. 448 func HasSuffix(s, suffix string) bool { 449 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 450 } 451 452 // Map returns a copy of the string s with all its characters modified 453 // according to the mapping function. If mapping returns a negative value, the character is 454 // dropped from the string with no replacement. 455 func Map(mapping func(rune) rune, s string) string { 456 // In the worst case, the string can grow when mapped, making 457 // things unpleasant. But it's so rare we barge in assuming it's 458 // fine. It could also shrink but that falls out naturally. 459 460 // The output buffer b is initialized on demand, the first 461 // time a character differs. 462 var b Builder 463 464 for i, c := range s { 465 r := mapping(c) 466 if r == c && c != utf8.RuneError { 467 continue 468 } 469 470 var width int 471 if c == utf8.RuneError { 472 c, width = utf8.DecodeRuneInString(s[i:]) 473 if width != 1 && r == c { 474 continue 475 } 476 } else { 477 width = utf8.RuneLen(c) 478 } 479 480 b.Grow(len(s) + utf8.UTFMax) 481 b.WriteString(s[:i]) 482 if r >= 0 { 483 b.WriteRune(r) 484 } 485 486 s = s[i+width:] 487 break 488 } 489 490 // Fast path for unchanged input 491 if b.Cap() == 0 { // didn't call b.Grow above 492 return s 493 } 494 495 for _, c := range s { 496 r := mapping(c) 497 498 if r >= 0 { 499 // common case 500 // Due to inlining, it is more performant to determine if WriteByte should be 501 // invoked rather than always call WriteRune 502 if r < utf8.RuneSelf { 503 b.WriteByte(byte(r)) 504 } else { 505 // r is not a ASCII rune. 506 b.WriteRune(r) 507 } 508 } 509 } 510 511 return b.String() 512 } 513 514 // Repeat returns a new string consisting of count copies of the string s. 515 // 516 // It panics if count is negative or if 517 // the result of (len(s) * count) overflows. 518 func Repeat(s string, count int) string { 519 if count == 0 { 520 return "" 521 } 522 523 // Since we cannot return an error on overflow, 524 // we should panic if the repeat will generate 525 // an overflow. 526 // See Issue golang.org/issue/16237 527 if count < 0 { 528 panic("strings: negative Repeat count") 529 } else if len(s)*count/count != len(s) { 530 panic("strings: Repeat count causes overflow") 531 } 532 533 n := len(s) * count 534 var b Builder 535 b.Grow(n) 536 b.WriteString(s) 537 for b.Len() < n { 538 if b.Len() <= n/2 { 539 b.WriteString(b.String()) 540 } else { 541 b.WriteString(b.String()[:n-b.Len()]) 542 break 543 } 544 } 545 return b.String() 546 } 547 548 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. 549 func ToUpper(s string) string { 550 isASCII, hasLower := true, false 551 for i := 0; i < len(s); i++ { 552 c := s[i] 553 if c >= utf8.RuneSelf { 554 isASCII = false 555 break 556 } 557 hasLower = hasLower || (c >= 'a' && c <= 'z') 558 } 559 560 if isASCII { // optimize for ASCII-only strings. 561 if !hasLower { 562 return s 563 } 564 var b Builder 565 b.Grow(len(s)) 566 for i := 0; i < len(s); i++ { 567 c := s[i] 568 if c >= 'a' && c <= 'z' { 569 c -= 'a' - 'A' 570 } 571 b.WriteByte(c) 572 } 573 return b.String() 574 } 575 return Map(unicode.ToUpper, s) 576 } 577 578 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. 579 func ToLower(s string) string { 580 isASCII, hasUpper := true, false 581 for i := 0; i < len(s); i++ { 582 c := s[i] 583 if c >= utf8.RuneSelf { 584 isASCII = false 585 break 586 } 587 hasUpper = hasUpper || (c >= 'A' && c <= 'Z') 588 } 589 590 if isASCII { // optimize for ASCII-only strings. 591 if !hasUpper { 592 return s 593 } 594 var b Builder 595 b.Grow(len(s)) 596 for i := 0; i < len(s); i++ { 597 c := s[i] 598 if c >= 'A' && c <= 'Z' { 599 c += 'a' - 'A' 600 } 601 b.WriteByte(c) 602 } 603 return b.String() 604 } 605 return Map(unicode.ToLower, s) 606 } 607 608 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. 609 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 610 611 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 612 // upper case using the case mapping specified by c. 613 func ToUpperSpecial(c unicode.SpecialCase, s string) string { 614 return Map(c.ToUpper, s) 615 } 616 617 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 618 // lower case using the case mapping specified by c. 619 func ToLowerSpecial(c unicode.SpecialCase, s string) string { 620 return Map(c.ToLower, s) 621 } 622 623 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 624 // title case, giving priority to the special casing rules. 625 func ToTitleSpecial(c unicode.SpecialCase, s string) string { 626 return Map(c.ToTitle, s) 627 } 628 629 // isSeparator reports whether the rune could mark a word boundary. 630 // TODO: update when package unicode captures more of the properties. 631 func isSeparator(r rune) bool { 632 // ASCII alphanumerics and underscore are not separators 633 if r <= 0x7F { 634 switch { 635 case '0' <= r && r <= '9': 636 return false 637 case 'a' <= r && r <= 'z': 638 return false 639 case 'A' <= r && r <= 'Z': 640 return false 641 case r == '_': 642 return false 643 } 644 return true 645 } 646 // Letters and digits are not separators 647 if unicode.IsLetter(r) || unicode.IsDigit(r) { 648 return false 649 } 650 // Otherwise, all we can do for now is treat spaces as separators. 651 return unicode.IsSpace(r) 652 } 653 654 // Title returns a copy of the string s with all Unicode letters that begin words 655 // mapped to their title case. 656 // 657 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 658 func Title(s string) string { 659 // Use a closure here to remember state. 660 // Hackish but effective. Depends on Map scanning in order and calling 661 // the closure once per rune. 662 prev := ' ' 663 return Map( 664 func(r rune) rune { 665 if isSeparator(prev) { 666 prev = r 667 return unicode.ToTitle(r) 668 } 669 prev = r 670 return r 671 }, 672 s) 673 } 674 675 // TrimLeftFunc returns a slice of the string s with all leading 676 // Unicode code points c satisfying f(c) removed. 677 func TrimLeftFunc(s string, f func(rune) bool) string { 678 i := indexFunc(s, f, false) 679 if i == -1 { 680 return "" 681 } 682 return s[i:] 683 } 684 685 // TrimRightFunc returns a slice of the string s with all trailing 686 // Unicode code points c satisfying f(c) removed. 687 func TrimRightFunc(s string, f func(rune) bool) string { 688 i := lastIndexFunc(s, f, false) 689 if i >= 0 && s[i] >= utf8.RuneSelf { 690 _, wid := utf8.DecodeRuneInString(s[i:]) 691 i += wid 692 } else { 693 i++ 694 } 695 return s[0:i] 696 } 697 698 // TrimFunc returns a slice of the string s with all leading 699 // and trailing Unicode code points c satisfying f(c) removed. 700 func TrimFunc(s string, f func(rune) bool) string { 701 return TrimRightFunc(TrimLeftFunc(s, f), f) 702 } 703 704 // IndexFunc returns the index into s of the first Unicode 705 // code point satisfying f(c), or -1 if none do. 706 func IndexFunc(s string, f func(rune) bool) int { 707 return indexFunc(s, f, true) 708 } 709 710 // LastIndexFunc returns the index into s of the last 711 // Unicode code point satisfying f(c), or -1 if none do. 712 func LastIndexFunc(s string, f func(rune) bool) int { 713 return lastIndexFunc(s, f, true) 714 } 715 716 // indexFunc is the same as IndexFunc except that if 717 // truth==false, the sense of the predicate function is 718 // inverted. 719 func indexFunc(s string, f func(rune) bool, truth bool) int { 720 for i, r := range s { 721 if f(r) == truth { 722 return i 723 } 724 } 725 return -1 726 } 727 728 // lastIndexFunc is the same as LastIndexFunc except that if 729 // truth==false, the sense of the predicate function is 730 // inverted. 731 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 732 for i := len(s); i > 0; { 733 r, size := utf8.DecodeLastRuneInString(s[0:i]) 734 i -= size 735 if f(r) == truth { 736 return i 737 } 738 } 739 return -1 740 } 741 742 // asciiSet is a 32-byte value, where each bit represents the presence of a 743 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 744 // starting with the least-significant bit of the lowest word to the 745 // most-significant bit of the highest word, map to the full range of all 746 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 747 // ensuring that any non-ASCII character will be reported as not in the set. 748 type asciiSet [8]uint32 749 750 // makeASCIISet creates a set of ASCII characters and reports whether all 751 // characters in chars are ASCII. 752 func makeASCIISet(chars string) (as asciiSet, ok bool) { 753 for i := 0; i < len(chars); i++ { 754 c := chars[i] 755 if c >= utf8.RuneSelf { 756 return as, false 757 } 758 as[c>>5] |= 1 << uint(c&31) 759 } 760 return as, true 761 } 762 763 // contains reports whether c is inside the set. 764 func (as *asciiSet) contains(c byte) bool { 765 return (as[c>>5] & (1 << uint(c&31))) != 0 766 } 767 768 func makeCutsetFunc(cutset string) func(rune) bool { 769 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 770 return func(r rune) bool { 771 return r == rune(cutset[0]) 772 } 773 } 774 if as, isASCII := makeASCIISet(cutset); isASCII { 775 return func(r rune) bool { 776 return r < utf8.RuneSelf && as.contains(byte(r)) 777 } 778 } 779 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 780 } 781 782 // Trim returns a slice of the string s with all leading and 783 // trailing Unicode code points contained in cutset removed. 784 func Trim(s string, cutset string) string { 785 if s == "" || cutset == "" { 786 return s 787 } 788 return TrimFunc(s, makeCutsetFunc(cutset)) 789 } 790 791 // TrimLeft returns a slice of the string s with all leading 792 // Unicode code points contained in cutset removed. 793 // 794 // To remove a prefix, use TrimPrefix instead. 795 func TrimLeft(s string, cutset string) string { 796 if s == "" || cutset == "" { 797 return s 798 } 799 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 800 } 801 802 // TrimRight returns a slice of the string s, with all trailing 803 // Unicode code points contained in cutset removed. 804 // 805 // To remove a suffix, use TrimSuffix instead. 806 func TrimRight(s string, cutset string) string { 807 if s == "" || cutset == "" { 808 return s 809 } 810 return TrimRightFunc(s, makeCutsetFunc(cutset)) 811 } 812 813 // TrimSpace returns a slice of the string s, with all leading 814 // and trailing white space removed, as defined by Unicode. 815 func TrimSpace(s string) string { 816 return TrimFunc(s, unicode.IsSpace) 817 } 818 819 // TrimPrefix returns s without the provided leading prefix string. 820 // If s doesn't start with prefix, s is returned unchanged. 821 func TrimPrefix(s, prefix string) string { 822 if HasPrefix(s, prefix) { 823 return s[len(prefix):] 824 } 825 return s 826 } 827 828 // TrimSuffix returns s without the provided trailing suffix string. 829 // If s doesn't end with suffix, s is returned unchanged. 830 func TrimSuffix(s, suffix string) string { 831 if HasSuffix(s, suffix) { 832 return s[:len(s)-len(suffix)] 833 } 834 return s 835 } 836 837 // Replace returns a copy of the string s with the first n 838 // non-overlapping instances of old replaced by new. 839 // If old is empty, it matches at the beginning of the string 840 // and after each UTF-8 sequence, yielding up to k+1 replacements 841 // for a k-rune string. 842 // If n < 0, there is no limit on the number of replacements. 843 func Replace(s, old, new string, n int) string { 844 if old == new || n == 0 { 845 return s // avoid allocation 846 } 847 848 // Compute number of replacements. 849 if m := Count(s, old); m == 0 { 850 return s // avoid allocation 851 } else if n < 0 || m < n { 852 n = m 853 } 854 855 // Apply replacements to buffer. 856 t := make([]byte, len(s)+n*(len(new)-len(old))) 857 w := 0 858 start := 0 859 for i := 0; i < n; i++ { 860 j := start 861 if len(old) == 0 { 862 if i > 0 { 863 _, wid := utf8.DecodeRuneInString(s[start:]) 864 j += wid 865 } 866 } else { 867 j += Index(s[start:], old) 868 } 869 w += copy(t[w:], s[start:j]) 870 w += copy(t[w:], new) 871 start = j + len(old) 872 } 873 w += copy(t[w:], s[start:]) 874 return string(t[0:w]) 875 } 876 877 // ReplaceAll returns a copy of the string s with all 878 // non-overlapping instances of old replaced by new. 879 // If old is empty, it matches at the beginning of the string 880 // and after each UTF-8 sequence, yielding up to k+1 replacements 881 // for a k-rune string. 882 func ReplaceAll(s, old, new string) string { 883 return Replace(s, old, new, -1) 884 } 885 886 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 887 // are equal under Unicode case-folding. 888 func EqualFold(s, t string) bool { 889 for s != "" && t != "" { 890 // Extract first rune from each string. 891 var sr, tr rune 892 if s[0] < utf8.RuneSelf { 893 sr, s = rune(s[0]), s[1:] 894 } else { 895 r, size := utf8.DecodeRuneInString(s) 896 sr, s = r, s[size:] 897 } 898 if t[0] < utf8.RuneSelf { 899 tr, t = rune(t[0]), t[1:] 900 } else { 901 r, size := utf8.DecodeRuneInString(t) 902 tr, t = r, t[size:] 903 } 904 905 // If they match, keep going; if not, return false. 906 907 // Easy case. 908 if tr == sr { 909 continue 910 } 911 912 // Make sr < tr to simplify what follows. 913 if tr < sr { 914 tr, sr = sr, tr 915 } 916 // Fast check for ASCII. 917 if tr < utf8.RuneSelf { 918 // ASCII only, sr/tr must be upper/lower case 919 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 920 continue 921 } 922 return false 923 } 924 925 // General case. SimpleFold(x) returns the next equivalent rune > x 926 // or wraps around to smaller values. 927 r := unicode.SimpleFold(sr) 928 for r != sr && r < tr { 929 r = unicode.SimpleFold(r) 930 } 931 if r == tr { 932 continue 933 } 934 return false 935 } 936 937 // One string is empty. Are both? 938 return s == t 939 } 940 941 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 942 func Index(s, substr string) int { 943 n := len(substr) 944 switch { 945 case n == 0: 946 return 0 947 case n == 1: 948 return IndexByte(s, substr[0]) 949 case n == len(s): 950 if substr == s { 951 return 0 952 } 953 return -1 954 case n > len(s): 955 return -1 956 case n <= bytealg.MaxLen: 957 // Use brute force when s and substr both are small 958 if len(s) <= bytealg.MaxBruteForce { 959 return bytealg.IndexString(s, substr) 960 } 961 c0 := substr[0] 962 c1 := substr[1] 963 i := 0 964 t := len(s) - n + 1 965 fails := 0 966 for i < t { 967 if s[i] != c0 { 968 // IndexByte is faster than bytealg.IndexString, so use it as long as 969 // we're not getting lots of false positives. 970 o := IndexByte(s[i:t], c0) 971 if o < 0 { 972 return -1 973 } 974 i += o 975 } 976 if s[i+1] == c1 && s[i:i+n] == substr { 977 return i 978 } 979 fails++ 980 i++ 981 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 982 if fails > bytealg.Cutover(i) { 983 r := bytealg.IndexString(s[i:], substr) 984 if r >= 0 { 985 return r + i 986 } 987 return -1 988 } 989 } 990 return -1 991 } 992 c0 := substr[0] 993 c1 := substr[1] 994 i := 0 995 t := len(s) - n + 1 996 fails := 0 997 for i < t { 998 if s[i] != c0 { 999 o := IndexByte(s[i:t], c0) 1000 if o < 0 { 1001 return -1 1002 } 1003 i += o 1004 } 1005 if s[i+1] == c1 && s[i:i+n] == substr { 1006 return i 1007 } 1008 i++ 1009 fails++ 1010 if fails >= 4+i>>4 && i < t { 1011 // See comment in ../bytes/bytes_generic.go. 1012 j := indexRabinKarp(s[i:], substr) 1013 if j < 0 { 1014 return -1 1015 } 1016 return i + j 1017 } 1018 } 1019 return -1 1020 } 1021 1022 func indexRabinKarp(s, substr string) int { 1023 // Rabin-Karp search 1024 hashss, pow := hashStr(substr) 1025 n := len(substr) 1026 var h uint32 1027 for i := 0; i < n; i++ { 1028 h = h*primeRK + uint32(s[i]) 1029 } 1030 if h == hashss && s[:n] == substr { 1031 return 0 1032 } 1033 for i := n; i < len(s); { 1034 h *= primeRK 1035 h += uint32(s[i]) 1036 h -= pow * uint32(s[i-n]) 1037 i++ 1038 if h == hashss && s[i-n:i] == substr { 1039 return i - n 1040 } 1041 } 1042 return -1 1043 1044 }