github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/strings/strings.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate UTF-8 encoded strings. 6 // 7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8 package strings 9 10 import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14 ) 15 16 // explode splits s into a slice of UTF-8 strings, 17 // one string per Unicode character up to a maximum of n (n < 0 means no limit). 18 // Invalid UTF-8 sequences become correct encodings of U+FFFD. 19 func explode(s string, n int) []string { 20 l := utf8.RuneCountInString(s) 21 if n < 0 || n > l { 22 n = l 23 } 24 a := make([]string, n) 25 for i := 0; i < n-1; i++ { 26 ch, size := utf8.DecodeRuneInString(s) 27 a[i] = s[:size] 28 s = s[size:] 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } 32 } 33 if n > 0 { 34 a[n-1] = s 35 } 36 return a 37 } 38 39 // primeRK is the prime base used in Rabin-Karp algorithm. 40 const primeRK = 16777619 41 42 // hashStr returns the hash and the appropriate multiplicative 43 // factor for use in Rabin-Karp algorithm. 44 func hashStr(sep string) (uint32, uint32) { 45 hash := uint32(0) 46 for i := 0; i < len(sep); i++ { 47 hash = hash*primeRK + uint32(sep[i]) 48 } 49 var pow, sq uint32 = 1, primeRK 50 for i := len(sep); i > 0; i >>= 1 { 51 if i&1 != 0 { 52 pow *= sq 53 } 54 sq *= sq 55 } 56 return hash, pow 57 } 58 59 // hashStrRev returns the hash of the reverse of sep and the 60 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 61 func hashStrRev(sep string) (uint32, uint32) { 62 hash := uint32(0) 63 for i := len(sep) - 1; i >= 0; i-- { 64 hash = hash*primeRK + uint32(sep[i]) 65 } 66 var pow, sq uint32 = 1, primeRK 67 for i := len(sep); i > 0; i >>= 1 { 68 if i&1 != 0 { 69 pow *= sq 70 } 71 sq *= sq 72 } 73 return hash, pow 74 } 75 76 // Count counts the number of non-overlapping instances of substr in s. 77 // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 78 func Count(s, substr string) int { 79 // special case 80 if len(substr) == 0 { 81 return utf8.RuneCountInString(s) + 1 82 } 83 if len(substr) == 1 { 84 return bytealg.CountString(s, substr[0]) 85 } 86 n := 0 87 for { 88 i := Index(s, substr) 89 if i == -1 { 90 return n 91 } 92 n++ 93 s = s[i+len(substr):] 94 } 95 } 96 97 // Contains reports whether substr is within s. 98 func Contains(s, substr string) bool { 99 return Index(s, substr) >= 0 100 } 101 102 // ContainsAny reports whether any Unicode code points in chars are within s. 103 func ContainsAny(s, chars string) bool { 104 return IndexAny(s, chars) >= 0 105 } 106 107 // ContainsRune reports whether the Unicode code point r is within s. 108 func ContainsRune(s string, r rune) bool { 109 return IndexRune(s, r) >= 0 110 } 111 112 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 113 func LastIndex(s, substr string) int { 114 n := len(substr) 115 switch { 116 case n == 0: 117 return len(s) 118 case n == 1: 119 return LastIndexByte(s, substr[0]) 120 case n == len(s): 121 if substr == s { 122 return 0 123 } 124 return -1 125 case n > len(s): 126 return -1 127 } 128 // Rabin-Karp search from the end of the string 129 hashss, pow := hashStrRev(substr) 130 last := len(s) - n 131 var h uint32 132 for i := len(s) - 1; i >= last; i-- { 133 h = h*primeRK + uint32(s[i]) 134 } 135 if h == hashss && s[last:] == substr { 136 return last 137 } 138 for i := last - 1; i >= 0; i-- { 139 h *= primeRK 140 h += uint32(s[i]) 141 h -= pow * uint32(s[i+n]) 142 if h == hashss && s[i:i+n] == substr { 143 return i 144 } 145 } 146 return -1 147 } 148 149 // IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 150 func IndexByte(s string, c byte) int { 151 return bytealg.IndexByteString(s, c) 152 } 153 154 // IndexRune returns the index of the first instance of the Unicode code point 155 // r, or -1 if rune is not present in s. 156 // If r is utf8.RuneError, it returns the first instance of any 157 // invalid UTF-8 byte sequence. 158 func IndexRune(s string, r rune) int { 159 switch { 160 case 0 <= r && r < utf8.RuneSelf: 161 return IndexByte(s, byte(r)) 162 case r == utf8.RuneError: 163 for i, r := range s { 164 if r == utf8.RuneError { 165 return i 166 } 167 } 168 return -1 169 case !utf8.ValidRune(r): 170 return -1 171 default: 172 return Index(s, string(r)) 173 } 174 } 175 176 // IndexAny returns the index of the first instance of any Unicode code point 177 // from chars in s, or -1 if no Unicode code point from chars is present in s. 178 func IndexAny(s, chars string) int { 179 if chars == "" { 180 // Avoid scanning all of s. 181 return -1 182 } 183 if len(s) > 8 { 184 if as, isASCII := makeASCIISet(chars); isASCII { 185 for i := 0; i < len(s); i++ { 186 if as.contains(s[i]) { 187 return i 188 } 189 } 190 return -1 191 } 192 } 193 for i, c := range s { 194 for _, m := range chars { 195 if c == m { 196 return i 197 } 198 } 199 } 200 return -1 201 } 202 203 // LastIndexAny returns the index of the last instance of any Unicode code 204 // point from chars in s, or -1 if no Unicode code point from chars is 205 // present in s. 206 func LastIndexAny(s, chars string) int { 207 if chars == "" { 208 // Avoid scanning all of s. 209 return -1 210 } 211 if len(s) > 8 { 212 if as, isASCII := makeASCIISet(chars); isASCII { 213 for i := len(s) - 1; i >= 0; i-- { 214 if as.contains(s[i]) { 215 return i 216 } 217 } 218 return -1 219 } 220 } 221 for i := len(s); i > 0; { 222 r, size := utf8.DecodeLastRuneInString(s[:i]) 223 i -= size 224 for _, c := range chars { 225 if r == c { 226 return i 227 } 228 } 229 } 230 return -1 231 } 232 233 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 234 func LastIndexByte(s string, c byte) int { 235 for i := len(s) - 1; i >= 0; i-- { 236 if s[i] == c { 237 return i 238 } 239 } 240 return -1 241 } 242 243 // Generic split: splits after each instance of sep, 244 // including sepSave bytes of sep in the subarrays. 245 func genSplit(s, sep string, sepSave, n int) []string { 246 if n == 0 { 247 return nil 248 } 249 if sep == "" { 250 return explode(s, n) 251 } 252 if n < 0 { 253 n = Count(s, sep) + 1 254 } 255 256 a := make([]string, n) 257 n-- 258 i := 0 259 for i < n { 260 m := Index(s, sep) 261 if m < 0 { 262 break 263 } 264 a[i] = s[:m+sepSave] 265 s = s[m+len(sep):] 266 i++ 267 } 268 a[i] = s 269 return a[:i+1] 270 } 271 272 // SplitN slices s into substrings separated by sep and returns a slice of 273 // the substrings between those separators. 274 // 275 // The count determines the number of substrings to return: 276 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 277 // n == 0: the result is nil (zero substrings) 278 // n < 0: all substrings 279 // 280 // Edge cases for s and sep (for example, empty strings) are handled 281 // as described in the documentation for Split. 282 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 283 284 // SplitAfterN slices s into substrings after each instance of sep and 285 // returns a slice of those substrings. 286 // 287 // The count determines the number of substrings to return: 288 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 289 // n == 0: the result is nil (zero substrings) 290 // n < 0: all substrings 291 // 292 // Edge cases for s and sep (for example, empty strings) are handled 293 // as described in the documentation for SplitAfter. 294 func SplitAfterN(s, sep string, n int) []string { 295 return genSplit(s, sep, len(sep), n) 296 } 297 298 // Split slices s into all substrings separated by sep and returns a slice of 299 // the substrings between those separators. 300 // 301 // If s does not contain sep and sep is not empty, Split returns a 302 // slice of length 1 whose only element is s. 303 // 304 // If sep is empty, Split splits after each UTF-8 sequence. If both s 305 // and sep are empty, Split returns an empty slice. 306 // 307 // It is equivalent to SplitN with a count of -1. 308 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 309 310 // SplitAfter slices s into all substrings after each instance of sep and 311 // returns a slice of those substrings. 312 // 313 // If s does not contain sep and sep is not empty, SplitAfter returns 314 // a slice of length 1 whose only element is s. 315 // 316 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If 317 // both s and sep are empty, SplitAfter returns an empty slice. 318 // 319 // It is equivalent to SplitAfterN with a count of -1. 320 func SplitAfter(s, sep string) []string { 321 return genSplit(s, sep, len(sep), -1) 322 } 323 324 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 325 326 // Fields splits the string s around each instance of one or more consecutive white space 327 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 328 // empty slice if s contains only white space. 329 func Fields(s string) []string { 330 // First count the fields. 331 // This is an exact count if s is ASCII, otherwise it is an approximation. 332 n := 0 333 wasSpace := 1 334 // setBits is used to track which bits are set in the bytes of s. 335 setBits := uint8(0) 336 for i := 0; i < len(s); i++ { 337 r := s[i] 338 setBits |= r 339 isSpace := int(asciiSpace[r]) 340 n += wasSpace & ^isSpace 341 wasSpace = isSpace 342 } 343 344 if setBits >= utf8.RuneSelf { 345 // Some runes in the input string are not ASCII. 346 return FieldsFunc(s, unicode.IsSpace) 347 } 348 // ASCII fast path 349 a := make([]string, n) 350 na := 0 351 fieldStart := 0 352 i := 0 353 // Skip spaces in the front of the input. 354 for i < len(s) && asciiSpace[s[i]] != 0 { 355 i++ 356 } 357 fieldStart = i 358 for i < len(s) { 359 if asciiSpace[s[i]] == 0 { 360 i++ 361 continue 362 } 363 a[na] = s[fieldStart:i] 364 na++ 365 i++ 366 // Skip spaces in between fields. 367 for i < len(s) && asciiSpace[s[i]] != 0 { 368 i++ 369 } 370 fieldStart = i 371 } 372 if fieldStart < len(s) { // Last field might end at EOF. 373 a[na] = s[fieldStart:] 374 } 375 return a 376 } 377 378 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 379 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 380 // string is empty, an empty slice is returned. 381 // FieldsFunc makes no guarantees about the order in which it calls f(c). 382 // If f does not return consistent results for a given c, FieldsFunc may crash. 383 func FieldsFunc(s string, f func(rune) bool) []string { 384 // A span is used to record a slice of s of the form s[start:end]. 385 // The start index is inclusive and the end index is exclusive. 386 type span struct { 387 start int 388 end int 389 } 390 spans := make([]span, 0, 32) 391 392 // Find the field start and end indices. 393 wasField := false 394 fromIndex := 0 395 for i, rune := range s { 396 if f(rune) { 397 if wasField { 398 spans = append(spans, span{start: fromIndex, end: i}) 399 wasField = false 400 } 401 } else { 402 if !wasField { 403 fromIndex = i 404 wasField = true 405 } 406 } 407 } 408 409 // Last field might end at EOF. 410 if wasField { 411 spans = append(spans, span{fromIndex, len(s)}) 412 } 413 414 // Create strings from recorded field indices. 415 a := make([]string, len(spans)) 416 for i, span := range spans { 417 a[i] = s[span.start:span.end] 418 } 419 420 return a 421 } 422 423 // Join concatenates the elements of a to create a single string. The separator string 424 // sep is placed between elements in the resulting string. 425 func Join(a []string, sep string) string { 426 switch len(a) { 427 case 0: 428 return "" 429 case 1: 430 return a[0] 431 } 432 n := len(sep) * (len(a) - 1) 433 for i := 0; i < len(a); i++ { 434 n += len(a[i]) 435 } 436 437 var b Builder 438 b.Grow(n) 439 b.WriteString(a[0]) 440 for _, s := range a[1:] { 441 b.WriteString(sep) 442 b.WriteString(s) 443 } 444 return b.String() 445 } 446 447 // HasPrefix tests whether the string s begins with prefix. 448 func HasPrefix(s, prefix string) bool { 449 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 450 } 451 452 // HasSuffix tests whether the string s ends with suffix. 453 func HasSuffix(s, suffix string) bool { 454 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 455 } 456 457 // Map returns a copy of the string s with all its characters modified 458 // according to the mapping function. If mapping returns a negative value, the character is 459 // dropped from the string with no replacement. 460 func Map(mapping func(rune) rune, s string) string { 461 // In the worst case, the string can grow when mapped, making 462 // things unpleasant. But it's so rare we barge in assuming it's 463 // fine. It could also shrink but that falls out naturally. 464 465 // The output buffer b is initialized on demand, the first 466 // time a character differs. 467 var b Builder 468 469 for i, c := range s { 470 r := mapping(c) 471 if r == c && c != utf8.RuneError { 472 continue 473 } 474 475 var width int 476 if c == utf8.RuneError { 477 c, width = utf8.DecodeRuneInString(s[i:]) 478 if width != 1 && r == c { 479 continue 480 } 481 } else { 482 width = utf8.RuneLen(c) 483 } 484 485 b.Grow(len(s) + utf8.UTFMax) 486 b.WriteString(s[:i]) 487 if r >= 0 { 488 b.WriteRune(r) 489 } 490 491 s = s[i+width:] 492 break 493 } 494 495 // Fast path for unchanged input 496 if b.Cap() == 0 { // didn't call b.Grow above 497 return s 498 } 499 500 for _, c := range s { 501 r := mapping(c) 502 503 if r >= 0 { 504 // common case 505 // Due to inlining, it is more performant to determine if WriteByte should be 506 // invoked rather than always call WriteRune 507 if r < utf8.RuneSelf { 508 b.WriteByte(byte(r)) 509 } else { 510 // r is not a ASCII rune. 511 b.WriteRune(r) 512 } 513 } 514 } 515 516 return b.String() 517 } 518 519 // Repeat returns a new string consisting of count copies of the string s. 520 // 521 // It panics if count is negative or if 522 // the result of (len(s) * count) overflows. 523 func Repeat(s string, count int) string { 524 if count == 0 { 525 return "" 526 } 527 528 // Since we cannot return an error on overflow, 529 // we should panic if the repeat will generate 530 // an overflow. 531 // See Issue golang.org/issue/16237 532 if count < 0 { 533 panic("strings: negative Repeat count") 534 } else if len(s)*count/count != len(s) { 535 panic("strings: Repeat count causes overflow") 536 } 537 538 n := len(s) * count 539 var b Builder 540 b.Grow(n) 541 b.WriteString(s) 542 for b.Len() < n { 543 if b.Len() <= n/2 { 544 b.WriteString(b.String()) 545 } else { 546 b.WriteString(b.String()[:n-b.Len()]) 547 break 548 } 549 } 550 return b.String() 551 } 552 553 // ToUpper returns s with all Unicode letters mapped to their upper case. 554 func ToUpper(s string) string { 555 isASCII, hasLower := true, false 556 for i := 0; i < len(s); i++ { 557 c := s[i] 558 if c >= utf8.RuneSelf { 559 isASCII = false 560 break 561 } 562 hasLower = hasLower || ('a' <= c && c <= 'z') 563 } 564 565 if isASCII { // optimize for ASCII-only strings. 566 if !hasLower { 567 return s 568 } 569 var b Builder 570 b.Grow(len(s)) 571 for i := 0; i < len(s); i++ { 572 c := s[i] 573 if 'a' <= c && c <= 'z' { 574 c -= 'a' - 'A' 575 } 576 b.WriteByte(c) 577 } 578 return b.String() 579 } 580 return Map(unicode.ToUpper, s) 581 } 582 583 // ToLower returns s with all Unicode letters mapped to their lower case. 584 func ToLower(s string) string { 585 isASCII, hasUpper := true, false 586 for i := 0; i < len(s); i++ { 587 c := s[i] 588 if c >= utf8.RuneSelf { 589 isASCII = false 590 break 591 } 592 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 593 } 594 595 if isASCII { // optimize for ASCII-only strings. 596 if !hasUpper { 597 return s 598 } 599 var b Builder 600 b.Grow(len(s)) 601 for i := 0; i < len(s); i++ { 602 c := s[i] 603 if 'A' <= c && c <= 'Z' { 604 c += 'a' - 'A' 605 } 606 b.WriteByte(c) 607 } 608 return b.String() 609 } 610 return Map(unicode.ToLower, s) 611 } 612 613 // ToTitle returns a copy of the string s with all Unicode letters mapped to 614 // their Unicode title case. 615 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 616 617 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 618 // upper case using the case mapping specified by c. 619 func ToUpperSpecial(c unicode.SpecialCase, s string) string { 620 return Map(c.ToUpper, s) 621 } 622 623 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 624 // lower case using the case mapping specified by c. 625 func ToLowerSpecial(c unicode.SpecialCase, s string) string { 626 return Map(c.ToLower, s) 627 } 628 629 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 630 // Unicode title case, giving priority to the special casing rules. 631 func ToTitleSpecial(c unicode.SpecialCase, s string) string { 632 return Map(c.ToTitle, s) 633 } 634 635 // ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences 636 // replaced by the replacement string, which may be empty. 637 func ToValidUTF8(s, replacement string) string { 638 var b Builder 639 640 for i, c := range s { 641 if c != utf8.RuneError { 642 continue 643 } 644 645 _, wid := utf8.DecodeRuneInString(s[i:]) 646 if wid == 1 { 647 b.Grow(len(s) + len(replacement)) 648 b.WriteString(s[:i]) 649 s = s[i:] 650 break 651 } 652 } 653 654 // Fast path for unchanged input 655 if b.Cap() == 0 { // didn't call b.Grow above 656 return s 657 } 658 659 invalid := false // previous byte was from an invalid UTF-8 sequence 660 for i := 0; i < len(s); { 661 c := s[i] 662 if c < utf8.RuneSelf { 663 i++ 664 invalid = false 665 b.WriteByte(c) 666 continue 667 } 668 _, wid := utf8.DecodeRuneInString(s[i:]) 669 if wid == 1 { 670 i++ 671 if !invalid { 672 invalid = true 673 b.WriteString(replacement) 674 } 675 continue 676 } 677 invalid = false 678 b.WriteString(s[i : i+wid]) 679 i += wid 680 } 681 682 return b.String() 683 } 684 685 // isSeparator reports whether the rune could mark a word boundary. 686 // TODO: update when package unicode captures more of the properties. 687 func isSeparator(r rune) bool { 688 // ASCII alphanumerics and underscore are not separators 689 if r <= 0x7F { 690 switch { 691 case '0' <= r && r <= '9': 692 return false 693 case 'a' <= r && r <= 'z': 694 return false 695 case 'A' <= r && r <= 'Z': 696 return false 697 case r == '_': 698 return false 699 } 700 return true 701 } 702 // Letters and digits are not separators 703 if unicode.IsLetter(r) || unicode.IsDigit(r) { 704 return false 705 } 706 // Otherwise, all we can do for now is treat spaces as separators. 707 return unicode.IsSpace(r) 708 } 709 710 // Title returns a copy of the string s with all Unicode letters that begin words 711 // mapped to their Unicode title case. 712 // 713 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 714 func Title(s string) string { 715 // Use a closure here to remember state. 716 // Hackish but effective. Depends on Map scanning in order and calling 717 // the closure once per rune. 718 prev := ' ' 719 return Map( 720 func(r rune) rune { 721 if isSeparator(prev) { 722 prev = r 723 return unicode.ToTitle(r) 724 } 725 prev = r 726 return r 727 }, 728 s) 729 } 730 731 // TrimLeftFunc returns a slice of the string s with all leading 732 // Unicode code points c satisfying f(c) removed. 733 func TrimLeftFunc(s string, f func(rune) bool) string { 734 i := indexFunc(s, f, false) 735 if i == -1 { 736 return "" 737 } 738 return s[i:] 739 } 740 741 // TrimRightFunc returns a slice of the string s with all trailing 742 // Unicode code points c satisfying f(c) removed. 743 func TrimRightFunc(s string, f func(rune) bool) string { 744 i := lastIndexFunc(s, f, false) 745 if i >= 0 && s[i] >= utf8.RuneSelf { 746 _, wid := utf8.DecodeRuneInString(s[i:]) 747 i += wid 748 } else { 749 i++ 750 } 751 return s[0:i] 752 } 753 754 // TrimFunc returns a slice of the string s with all leading 755 // and trailing Unicode code points c satisfying f(c) removed. 756 func TrimFunc(s string, f func(rune) bool) string { 757 return TrimRightFunc(TrimLeftFunc(s, f), f) 758 } 759 760 // IndexFunc returns the index into s of the first Unicode 761 // code point satisfying f(c), or -1 if none do. 762 func IndexFunc(s string, f func(rune) bool) int { 763 return indexFunc(s, f, true) 764 } 765 766 // LastIndexFunc returns the index into s of the last 767 // Unicode code point satisfying f(c), or -1 if none do. 768 func LastIndexFunc(s string, f func(rune) bool) int { 769 return lastIndexFunc(s, f, true) 770 } 771 772 // indexFunc is the same as IndexFunc except that if 773 // truth==false, the sense of the predicate function is 774 // inverted. 775 func indexFunc(s string, f func(rune) bool, truth bool) int { 776 for i, r := range s { 777 if f(r) == truth { 778 return i 779 } 780 } 781 return -1 782 } 783 784 // lastIndexFunc is the same as LastIndexFunc except that if 785 // truth==false, the sense of the predicate function is 786 // inverted. 787 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 788 for i := len(s); i > 0; { 789 r, size := utf8.DecodeLastRuneInString(s[0:i]) 790 i -= size 791 if f(r) == truth { 792 return i 793 } 794 } 795 return -1 796 } 797 798 // asciiSet is a 32-byte value, where each bit represents the presence of a 799 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 800 // starting with the least-significant bit of the lowest word to the 801 // most-significant bit of the highest word, map to the full range of all 802 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 803 // ensuring that any non-ASCII character will be reported as not in the set. 804 type asciiSet [8]uint32 805 806 // makeASCIISet creates a set of ASCII characters and reports whether all 807 // characters in chars are ASCII. 808 func makeASCIISet(chars string) (as asciiSet, ok bool) { 809 for i := 0; i < len(chars); i++ { 810 c := chars[i] 811 if c >= utf8.RuneSelf { 812 return as, false 813 } 814 as[c>>5] |= 1 << uint(c&31) 815 } 816 return as, true 817 } 818 819 // contains reports whether c is inside the set. 820 func (as *asciiSet) contains(c byte) bool { 821 return (as[c>>5] & (1 << uint(c&31))) != 0 822 } 823 824 func makeCutsetFunc(cutset string) func(rune) bool { 825 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 826 return func(r rune) bool { 827 return r == rune(cutset[0]) 828 } 829 } 830 if as, isASCII := makeASCIISet(cutset); isASCII { 831 return func(r rune) bool { 832 return r < utf8.RuneSelf && as.contains(byte(r)) 833 } 834 } 835 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 836 } 837 838 // Trim returns a slice of the string s with all leading and 839 // trailing Unicode code points contained in cutset removed. 840 func Trim(s string, cutset string) string { 841 if s == "" || cutset == "" { 842 return s 843 } 844 return TrimFunc(s, makeCutsetFunc(cutset)) 845 } 846 847 // TrimLeft returns a slice of the string s with all leading 848 // Unicode code points contained in cutset removed. 849 // 850 // To remove a prefix, use TrimPrefix instead. 851 func TrimLeft(s string, cutset string) string { 852 if s == "" || cutset == "" { 853 return s 854 } 855 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 856 } 857 858 // TrimRight returns a slice of the string s, with all trailing 859 // Unicode code points contained in cutset removed. 860 // 861 // To remove a suffix, use TrimSuffix instead. 862 func TrimRight(s string, cutset string) string { 863 if s == "" || cutset == "" { 864 return s 865 } 866 return TrimRightFunc(s, makeCutsetFunc(cutset)) 867 } 868 869 // TrimSpace returns a slice of the string s, with all leading 870 // and trailing white space removed, as defined by Unicode. 871 func TrimSpace(s string) string { 872 // Fast path for ASCII: look for the first ASCII non-space byte 873 start := 0 874 for ; start < len(s); start++ { 875 c := s[start] 876 if c >= utf8.RuneSelf { 877 // If we run into a non-ASCII byte, fall back to the 878 // slower unicode-aware method on the remaining bytes 879 return TrimFunc(s[start:], unicode.IsSpace) 880 } 881 if asciiSpace[c] == 0 { 882 break 883 } 884 } 885 886 // Now look for the first ASCII non-space byte from the end 887 stop := len(s) 888 for ; stop > start; stop-- { 889 c := s[stop-1] 890 if c >= utf8.RuneSelf { 891 return TrimFunc(s[start:stop], unicode.IsSpace) 892 } 893 if asciiSpace[c] == 0 { 894 break 895 } 896 } 897 898 // At this point s[start:stop] starts and ends with an ASCII 899 // non-space bytes, so we're done. Non-ASCII cases have already 900 // been handled above. 901 return s[start:stop] 902 } 903 904 // TrimPrefix returns s without the provided leading prefix string. 905 // If s doesn't start with prefix, s is returned unchanged. 906 func TrimPrefix(s, prefix string) string { 907 if HasPrefix(s, prefix) { 908 return s[len(prefix):] 909 } 910 return s 911 } 912 913 // TrimSuffix returns s without the provided trailing suffix string. 914 // If s doesn't end with suffix, s is returned unchanged. 915 func TrimSuffix(s, suffix string) string { 916 if HasSuffix(s, suffix) { 917 return s[:len(s)-len(suffix)] 918 } 919 return s 920 } 921 922 // Replace returns a copy of the string s with the first n 923 // non-overlapping instances of old replaced by new. 924 // If old is empty, it matches at the beginning of the string 925 // and after each UTF-8 sequence, yielding up to k+1 replacements 926 // for a k-rune string. 927 // If n < 0, there is no limit on the number of replacements. 928 func Replace(s, old, new string, n int) string { 929 if old == new || n == 0 { 930 return s // avoid allocation 931 } 932 933 // Compute number of replacements. 934 if m := Count(s, old); m == 0 { 935 return s // avoid allocation 936 } else if n < 0 || m < n { 937 n = m 938 } 939 940 // Apply replacements to buffer. 941 t := make([]byte, len(s)+n*(len(new)-len(old))) 942 w := 0 943 start := 0 944 for i := 0; i < n; i++ { 945 j := start 946 if len(old) == 0 { 947 if i > 0 { 948 _, wid := utf8.DecodeRuneInString(s[start:]) 949 j += wid 950 } 951 } else { 952 j += Index(s[start:], old) 953 } 954 w += copy(t[w:], s[start:j]) 955 w += copy(t[w:], new) 956 start = j + len(old) 957 } 958 w += copy(t[w:], s[start:]) 959 return string(t[0:w]) 960 } 961 962 // ReplaceAll returns a copy of the string s with all 963 // non-overlapping instances of old replaced by new. 964 // If old is empty, it matches at the beginning of the string 965 // and after each UTF-8 sequence, yielding up to k+1 replacements 966 // for a k-rune string. 967 func ReplaceAll(s, old, new string) string { 968 return Replace(s, old, new, -1) 969 } 970 971 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 972 // are equal under Unicode case-folding. 973 func EqualFold(s, t string) bool { 974 for s != "" && t != "" { 975 // Extract first rune from each string. 976 var sr, tr rune 977 if s[0] < utf8.RuneSelf { 978 sr, s = rune(s[0]), s[1:] 979 } else { 980 r, size := utf8.DecodeRuneInString(s) 981 sr, s = r, s[size:] 982 } 983 if t[0] < utf8.RuneSelf { 984 tr, t = rune(t[0]), t[1:] 985 } else { 986 r, size := utf8.DecodeRuneInString(t) 987 tr, t = r, t[size:] 988 } 989 990 // If they match, keep going; if not, return false. 991 992 // Easy case. 993 if tr == sr { 994 continue 995 } 996 997 // Make sr < tr to simplify what follows. 998 if tr < sr { 999 tr, sr = sr, tr 1000 } 1001 // Fast check for ASCII. 1002 if tr < utf8.RuneSelf { 1003 // ASCII only, sr/tr must be upper/lower case 1004 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1005 continue 1006 } 1007 return false 1008 } 1009 1010 // General case. SimpleFold(x) returns the next equivalent rune > x 1011 // or wraps around to smaller values. 1012 r := unicode.SimpleFold(sr) 1013 for r != sr && r < tr { 1014 r = unicode.SimpleFold(r) 1015 } 1016 if r == tr { 1017 continue 1018 } 1019 return false 1020 } 1021 1022 // One string is empty. Are both? 1023 return s == t 1024 } 1025 1026 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 1027 func Index(s, substr string) int { 1028 n := len(substr) 1029 switch { 1030 case n == 0: 1031 return 0 1032 case n == 1: 1033 return IndexByte(s, substr[0]) 1034 case n == len(s): 1035 if substr == s { 1036 return 0 1037 } 1038 return -1 1039 case n > len(s): 1040 return -1 1041 case n <= bytealg.MaxLen: 1042 // Use brute force when s and substr both are small 1043 if len(s) <= bytealg.MaxBruteForce { 1044 return bytealg.IndexString(s, substr) 1045 } 1046 c0 := substr[0] 1047 c1 := substr[1] 1048 i := 0 1049 t := len(s) - n + 1 1050 fails := 0 1051 for i < t { 1052 if s[i] != c0 { 1053 // IndexByte is faster than bytealg.IndexString, so use it as long as 1054 // we're not getting lots of false positives. 1055 o := IndexByte(s[i:t], c0) 1056 if o < 0 { 1057 return -1 1058 } 1059 i += o 1060 } 1061 if s[i+1] == c1 && s[i:i+n] == substr { 1062 return i 1063 } 1064 fails++ 1065 i++ 1066 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 1067 if fails > bytealg.Cutover(i) { 1068 r := bytealg.IndexString(s[i:], substr) 1069 if r >= 0 { 1070 return r + i 1071 } 1072 return -1 1073 } 1074 } 1075 return -1 1076 } 1077 c0 := substr[0] 1078 c1 := substr[1] 1079 i := 0 1080 t := len(s) - n + 1 1081 fails := 0 1082 for i < t { 1083 if s[i] != c0 { 1084 o := IndexByte(s[i:t], c0) 1085 if o < 0 { 1086 return -1 1087 } 1088 i += o 1089 } 1090 if s[i+1] == c1 && s[i:i+n] == substr { 1091 return i 1092 } 1093 i++ 1094 fails++ 1095 if fails >= 4+i>>4 && i < t { 1096 // See comment in ../bytes/bytes_generic.go. 1097 j := indexRabinKarp(s[i:], substr) 1098 if j < 0 { 1099 return -1 1100 } 1101 return i + j 1102 } 1103 } 1104 return -1 1105 } 1106 1107 func indexRabinKarp(s, substr string) int { 1108 // Rabin-Karp search 1109 hashss, pow := hashStr(substr) 1110 n := len(substr) 1111 var h uint32 1112 for i := 0; i < n; i++ { 1113 h = h*primeRK + uint32(s[i]) 1114 } 1115 if h == hashss && s[:n] == substr { 1116 return 0 1117 } 1118 for i := n; i < len(s); { 1119 h *= primeRK 1120 h += uint32(s[i]) 1121 h -= pow * uint32(s[i-n]) 1122 i++ 1123 if h == hashss && s[i-n:i] == substr { 1124 return i - n 1125 } 1126 } 1127 return -1 1128 }