github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/strings/strings.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate UTF-8 encoded strings. 6 // 7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8 package strings 9 10 import ( 11 "internal/bytealg" 12 "unicode" 13 "unicode/utf8" 14 ) 15 16 const maxInt = int(^uint(0) >> 1) 17 18 // explode splits s into a slice of UTF-8 strings, 19 // one string per Unicode character up to a maximum of n (n < 0 means no limit). 20 // Invalid UTF-8 bytes are sliced individually. 21 func explode(s string, n int) []string { 22 l := utf8.RuneCountInString(s) 23 if n < 0 || n > l { 24 n = l 25 } 26 a := make([]string, n) 27 for i := 0; i < n-1; i++ { 28 _, size := utf8.DecodeRuneInString(s) 29 a[i] = s[:size] 30 s = s[size:] 31 } 32 if n > 0 { 33 a[n-1] = s 34 } 35 return a 36 } 37 38 // Count counts the number of non-overlapping instances of substr in s. 39 // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 40 func Count(s, substr string) int { 41 // special case 42 if len(substr) == 0 { 43 return utf8.RuneCountInString(s) + 1 44 } 45 if len(substr) == 1 { 46 return bytealg.CountString(s, substr[0]) 47 } 48 n := 0 49 for { 50 i := Index(s, substr) 51 if i == -1 { 52 return n 53 } 54 n++ 55 s = s[i+len(substr):] 56 } 57 } 58 59 // Contains reports whether substr is within s. 60 func Contains(s, substr string) bool { 61 return Index(s, substr) >= 0 62 } 63 64 // ContainsAny reports whether any Unicode code points in chars are within s. 65 func ContainsAny(s, chars string) bool { 66 return IndexAny(s, chars) >= 0 67 } 68 69 // ContainsRune reports whether the Unicode code point r is within s. 70 func ContainsRune(s string, r rune) bool { 71 return IndexRune(s, r) >= 0 72 } 73 74 // ContainsFunc reports whether any Unicode code points r within s satisfy f(r). 75 func ContainsFunc(s string, f func(rune) bool) bool { 76 return IndexFunc(s, f) >= 0 77 } 78 79 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 80 func LastIndex(s, substr string) int { 81 n := len(substr) 82 switch { 83 case n == 0: 84 return len(s) 85 case n == 1: 86 return bytealg.LastIndexByteString(s, substr[0]) 87 case n == len(s): 88 if substr == s { 89 return 0 90 } 91 return -1 92 case n > len(s): 93 return -1 94 } 95 // Rabin-Karp search from the end of the string 96 hashss, pow := bytealg.HashStrRev(substr) 97 last := len(s) - n 98 var h uint32 99 for i := len(s) - 1; i >= last; i-- { 100 h = h*bytealg.PrimeRK + uint32(s[i]) 101 } 102 if h == hashss && s[last:] == substr { 103 return last 104 } 105 for i := last - 1; i >= 0; i-- { 106 h *= bytealg.PrimeRK 107 h += uint32(s[i]) 108 h -= pow * uint32(s[i+n]) 109 if h == hashss && s[i:i+n] == substr { 110 return i 111 } 112 } 113 return -1 114 } 115 116 // IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 117 func IndexByte(s string, c byte) int { 118 return bytealg.IndexByteString(s, c) 119 } 120 121 // IndexRune returns the index of the first instance of the Unicode code point 122 // r, or -1 if rune is not present in s. 123 // If r is utf8.RuneError, it returns the first instance of any 124 // invalid UTF-8 byte sequence. 125 func IndexRune(s string, r rune) int { 126 switch { 127 case 0 <= r && r < utf8.RuneSelf: 128 return IndexByte(s, byte(r)) 129 case r == utf8.RuneError: 130 for i, r := range s { 131 if r == utf8.RuneError { 132 return i 133 } 134 } 135 return -1 136 case !utf8.ValidRune(r): 137 return -1 138 default: 139 return Index(s, string(r)) 140 } 141 } 142 143 // IndexAny returns the index of the first instance of any Unicode code point 144 // from chars in s, or -1 if no Unicode code point from chars is present in s. 145 func IndexAny(s, chars string) int { 146 if chars == "" { 147 // Avoid scanning all of s. 148 return -1 149 } 150 if len(chars) == 1 { 151 // Avoid scanning all of s. 152 r := rune(chars[0]) 153 if r >= utf8.RuneSelf { 154 r = utf8.RuneError 155 } 156 return IndexRune(s, r) 157 } 158 if len(s) > 8 { 159 if as, isASCII := makeASCIISet(chars); isASCII { 160 for i := 0; i < len(s); i++ { 161 if as.contains(s[i]) { 162 return i 163 } 164 } 165 return -1 166 } 167 } 168 for i, c := range s { 169 if IndexRune(chars, c) >= 0 { 170 return i 171 } 172 } 173 return -1 174 } 175 176 // LastIndexAny returns the index of the last instance of any Unicode code 177 // point from chars in s, or -1 if no Unicode code point from chars is 178 // present in s. 179 func LastIndexAny(s, chars string) int { 180 if chars == "" { 181 // Avoid scanning all of s. 182 return -1 183 } 184 if len(s) == 1 { 185 rc := rune(s[0]) 186 if rc >= utf8.RuneSelf { 187 rc = utf8.RuneError 188 } 189 if IndexRune(chars, rc) >= 0 { 190 return 0 191 } 192 return -1 193 } 194 if len(s) > 8 { 195 if as, isASCII := makeASCIISet(chars); isASCII { 196 for i := len(s) - 1; i >= 0; i-- { 197 if as.contains(s[i]) { 198 return i 199 } 200 } 201 return -1 202 } 203 } 204 if len(chars) == 1 { 205 rc := rune(chars[0]) 206 if rc >= utf8.RuneSelf { 207 rc = utf8.RuneError 208 } 209 for i := len(s); i > 0; { 210 r, size := utf8.DecodeLastRuneInString(s[:i]) 211 i -= size 212 if rc == r { 213 return i 214 } 215 } 216 return -1 217 } 218 for i := len(s); i > 0; { 219 r, size := utf8.DecodeLastRuneInString(s[:i]) 220 i -= size 221 if IndexRune(chars, r) >= 0 { 222 return i 223 } 224 } 225 return -1 226 } 227 228 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 229 func LastIndexByte(s string, c byte) int { 230 return bytealg.LastIndexByteString(s, c) 231 } 232 233 // Generic split: splits after each instance of sep, 234 // including sepSave bytes of sep in the subarrays. 235 func genSplit(s, sep string, sepSave, n int) []string { 236 if n == 0 { 237 return nil 238 } 239 if sep == "" { 240 return explode(s, n) 241 } 242 if n < 0 { 243 n = Count(s, sep) + 1 244 } 245 246 if n > len(s)+1 { 247 n = len(s) + 1 248 } 249 a := make([]string, n) 250 n-- 251 i := 0 252 for i < n { 253 m := Index(s, sep) 254 if m < 0 { 255 break 256 } 257 a[i] = s[:m+sepSave] 258 s = s[m+len(sep):] 259 i++ 260 } 261 a[i] = s 262 return a[:i+1] 263 } 264 265 // SplitN slices s into substrings separated by sep and returns a slice of 266 // the substrings between those separators. 267 // 268 // The count determines the number of substrings to return: 269 // 270 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 271 // n == 0: the result is nil (zero substrings) 272 // n < 0: all substrings 273 // 274 // Edge cases for s and sep (for example, empty strings) are handled 275 // as described in the documentation for [Split]. 276 // 277 // To split around the first instance of a separator, see Cut. 278 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 279 280 // SplitAfterN slices s into substrings after each instance of sep and 281 // returns a slice of those substrings. 282 // 283 // The count determines the number of substrings to return: 284 // 285 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 286 // n == 0: the result is nil (zero substrings) 287 // n < 0: all substrings 288 // 289 // Edge cases for s and sep (for example, empty strings) are handled 290 // as described in the documentation for SplitAfter. 291 func SplitAfterN(s, sep string, n int) []string { 292 return genSplit(s, sep, len(sep), n) 293 } 294 295 // Split slices s into all substrings separated by sep and returns a slice of 296 // the substrings between those separators. 297 // 298 // If s does not contain sep and sep is not empty, Split returns a 299 // slice of length 1 whose only element is s. 300 // 301 // If sep is empty, Split splits after each UTF-8 sequence. If both s 302 // and sep are empty, Split returns an empty slice. 303 // 304 // It is equivalent to [SplitN] with a count of -1. 305 // 306 // To split around the first instance of a separator, see Cut. 307 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 308 309 // SplitAfter slices s into all substrings after each instance of sep and 310 // returns a slice of those substrings. 311 // 312 // If s does not contain sep and sep is not empty, SplitAfter returns 313 // a slice of length 1 whose only element is s. 314 // 315 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If 316 // both s and sep are empty, SplitAfter returns an empty slice. 317 // 318 // It is equivalent to [SplitAfterN] with a count of -1. 319 func SplitAfter(s, sep string) []string { 320 return genSplit(s, sep, len(sep), -1) 321 } 322 323 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 324 325 // Fields splits the string s around each instance of one or more consecutive white space 326 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 327 // empty slice if s contains only white space. 328 func Fields(s string) []string { 329 // First count the fields. 330 // This is an exact count if s is ASCII, otherwise it is an approximation. 331 n := 0 332 wasSpace := 1 333 // setBits is used to track which bits are set in the bytes of s. 334 setBits := uint8(0) 335 for i := 0; i < len(s); i++ { 336 r := s[i] 337 setBits |= r 338 isSpace := int(asciiSpace[r]) 339 n += wasSpace & ^isSpace 340 wasSpace = isSpace 341 } 342 343 if setBits >= utf8.RuneSelf { 344 // Some runes in the input string are not ASCII. 345 return FieldsFunc(s, unicode.IsSpace) 346 } 347 // ASCII fast path 348 a := make([]string, n) 349 na := 0 350 fieldStart := 0 351 i := 0 352 // Skip spaces in the front of the input. 353 for i < len(s) && asciiSpace[s[i]] != 0 { 354 i++ 355 } 356 fieldStart = i 357 for i < len(s) { 358 if asciiSpace[s[i]] == 0 { 359 i++ 360 continue 361 } 362 a[na] = s[fieldStart:i] 363 na++ 364 i++ 365 // Skip spaces in between fields. 366 for i < len(s) && asciiSpace[s[i]] != 0 { 367 i++ 368 } 369 fieldStart = i 370 } 371 if fieldStart < len(s) { // Last field might end at EOF. 372 a[na] = s[fieldStart:] 373 } 374 return a 375 } 376 377 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 378 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 379 // string is empty, an empty slice is returned. 380 // 381 // FieldsFunc makes no guarantees about the order in which it calls f(c) 382 // and assumes that f always returns the same value for a given c. 383 func FieldsFunc(s string, f func(rune) bool) []string { 384 // A span is used to record a slice of s of the form s[start:end]. 385 // The start index is inclusive and the end index is exclusive. 386 type span struct { 387 start int 388 end int 389 } 390 spans := make([]span, 0, 32) 391 392 // Find the field start and end indices. 393 // Doing this in a separate pass (rather than slicing the string s 394 // and collecting the result substrings right away) is significantly 395 // more efficient, possibly due to cache effects. 396 start := -1 // valid span start if >= 0 397 for end, rune := range s { 398 if f(rune) { 399 if start >= 0 { 400 spans = append(spans, span{start, end}) 401 // Set start to a negative value. 402 // Note: using -1 here consistently and reproducibly 403 // slows down this code by a several percent on amd64. 404 start = ^start 405 } 406 } else { 407 if start < 0 { 408 start = end 409 } 410 } 411 } 412 413 // Last field might end at EOF. 414 if start >= 0 { 415 spans = append(spans, span{start, len(s)}) 416 } 417 418 // Create strings from recorded field indices. 419 a := make([]string, len(spans)) 420 for i, span := range spans { 421 a[i] = s[span.start:span.end] 422 } 423 424 return a 425 } 426 427 // Join concatenates the elements of its first argument to create a single string. The separator 428 // string sep is placed between elements in the resulting string. 429 func Join(elems []string, sep string) string { 430 switch len(elems) { 431 case 0: 432 return "" 433 case 1: 434 return elems[0] 435 } 436 437 var n int 438 if len(sep) > 0 { 439 if len(sep) >= maxInt/(len(elems)-1) { 440 panic("strings: Join output length overflow") 441 } 442 n += len(sep) * (len(elems) - 1) 443 } 444 for _, elem := range elems { 445 if len(elem) > maxInt-n { 446 panic("strings: Join output length overflow") 447 } 448 n += len(elem) 449 } 450 451 var b Builder 452 b.Grow(n) 453 b.WriteString(elems[0]) 454 for _, s := range elems[1:] { 455 b.WriteString(sep) 456 b.WriteString(s) 457 } 458 return b.String() 459 } 460 461 // HasPrefix reports whether the string s begins with prefix. 462 func HasPrefix(s, prefix string) bool { 463 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 464 } 465 466 // HasSuffix reports whether the string s ends with suffix. 467 func HasSuffix(s, suffix string) bool { 468 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 469 } 470 471 // Map returns a copy of the string s with all its characters modified 472 // according to the mapping function. If mapping returns a negative value, the character is 473 // dropped from the string with no replacement. 474 func Map(mapping func(rune) rune, s string) string { 475 // In the worst case, the string can grow when mapped, making 476 // things unpleasant. But it's so rare we barge in assuming it's 477 // fine. It could also shrink but that falls out naturally. 478 479 // The output buffer b is initialized on demand, the first 480 // time a character differs. 481 var b Builder 482 483 for i, c := range s { 484 r := mapping(c) 485 if r == c && c != utf8.RuneError { 486 continue 487 } 488 489 var width int 490 if c == utf8.RuneError { 491 c, width = utf8.DecodeRuneInString(s[i:]) 492 if width != 1 && r == c { 493 continue 494 } 495 } else { 496 width = utf8.RuneLen(c) 497 } 498 499 b.Grow(len(s) + utf8.UTFMax) 500 b.WriteString(s[:i]) 501 if r >= 0 { 502 b.WriteRune(r) 503 } 504 505 s = s[i+width:] 506 break 507 } 508 509 // Fast path for unchanged input 510 if b.Cap() == 0 { // didn't call b.Grow above 511 return s 512 } 513 514 for _, c := range s { 515 r := mapping(c) 516 517 if r >= 0 { 518 // common case 519 // Due to inlining, it is more performant to determine if WriteByte should be 520 // invoked rather than always call WriteRune 521 if r < utf8.RuneSelf { 522 b.WriteByte(byte(r)) 523 } else { 524 // r is not an ASCII rune. 525 b.WriteRune(r) 526 } 527 } 528 } 529 530 return b.String() 531 } 532 533 // According to static analysis, spaces, dashes, zeros, equals, and tabs 534 // are the most commonly repeated string literal, 535 // often used for display on fixed-width terminal windows. 536 // Pre-declare constants for these for O(1) repetition in the common-case. 537 const ( 538 repeatedSpaces = "" + 539 " " + 540 " " 541 repeatedDashes = "" + 542 "----------------------------------------------------------------" + 543 "----------------------------------------------------------------" 544 repeatedZeroes = "" + 545 "0000000000000000000000000000000000000000000000000000000000000000" 546 repeatedEquals = "" + 547 "================================================================" + 548 "================================================================" 549 repeatedTabs = "" + 550 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + 551 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" 552 ) 553 554 // Repeat returns a new string consisting of count copies of the string s. 555 // 556 // It panics if count is negative or if the result of (len(s) * count) 557 // overflows. 558 func Repeat(s string, count int) string { 559 switch count { 560 case 0: 561 return "" 562 case 1: 563 return s 564 } 565 566 // Since we cannot return an error on overflow, 567 // we should panic if the repeat will generate an overflow. 568 // See golang.org/issue/16237. 569 if count < 0 { 570 panic("strings: negative Repeat count") 571 } 572 if len(s) >= maxInt/count { 573 panic("strings: Repeat output length overflow") 574 } 575 n := len(s) * count 576 577 if len(s) == 0 { 578 return "" 579 } 580 581 // Optimize for commonly repeated strings of relatively short length. 582 switch s[0] { 583 case ' ', '-', '0', '=', '\t': 584 switch { 585 case n <= len(repeatedSpaces) && HasPrefix(repeatedSpaces, s): 586 return repeatedSpaces[:n] 587 case n <= len(repeatedDashes) && HasPrefix(repeatedDashes, s): 588 return repeatedDashes[:n] 589 case n <= len(repeatedZeroes) && HasPrefix(repeatedZeroes, s): 590 return repeatedZeroes[:n] 591 case n <= len(repeatedEquals) && HasPrefix(repeatedEquals, s): 592 return repeatedEquals[:n] 593 case n <= len(repeatedTabs) && HasPrefix(repeatedTabs, s): 594 return repeatedTabs[:n] 595 } 596 } 597 598 // Past a certain chunk size it is counterproductive to use 599 // larger chunks as the source of the write, as when the source 600 // is too large we are basically just thrashing the CPU D-cache. 601 // So if the result length is larger than an empirically-found 602 // limit (8KB), we stop growing the source string once the limit 603 // is reached and keep reusing the same source string - that 604 // should therefore be always resident in the L1 cache - until we 605 // have completed the construction of the result. 606 // This yields significant speedups (up to +100%) in cases where 607 // the result length is large (roughly, over L2 cache size). 608 const chunkLimit = 8 * 1024 609 chunkMax := n 610 if n > chunkLimit { 611 chunkMax = chunkLimit / len(s) * len(s) 612 if chunkMax == 0 { 613 chunkMax = len(s) 614 } 615 } 616 617 var b Builder 618 b.Grow(n) 619 b.WriteString(s) 620 for b.Len() < n { 621 chunk := n - b.Len() 622 if chunk > b.Len() { 623 chunk = b.Len() 624 } 625 if chunk > chunkMax { 626 chunk = chunkMax 627 } 628 b.WriteString(b.String()[:chunk]) 629 } 630 return b.String() 631 } 632 633 // ToUpper returns s with all Unicode letters mapped to their upper case. 634 func ToUpper(s string) string { 635 isASCII, hasLower := true, false 636 for i := 0; i < len(s); i++ { 637 c := s[i] 638 if c >= utf8.RuneSelf { 639 isASCII = false 640 break 641 } 642 hasLower = hasLower || ('a' <= c && c <= 'z') 643 } 644 645 if isASCII { // optimize for ASCII-only strings. 646 if !hasLower { 647 return s 648 } 649 var ( 650 b Builder 651 pos int 652 ) 653 b.Grow(len(s)) 654 for i := 0; i < len(s); i++ { 655 c := s[i] 656 if 'a' <= c && c <= 'z' { 657 c -= 'a' - 'A' 658 if pos < i { 659 b.WriteString(s[pos:i]) 660 } 661 b.WriteByte(c) 662 pos = i + 1 663 } 664 } 665 if pos < len(s) { 666 b.WriteString(s[pos:]) 667 } 668 return b.String() 669 } 670 return Map(unicode.ToUpper, s) 671 } 672 673 // ToLower returns s with all Unicode letters mapped to their lower case. 674 func ToLower(s string) string { 675 isASCII, hasUpper := true, false 676 for i := 0; i < len(s); i++ { 677 c := s[i] 678 if c >= utf8.RuneSelf { 679 isASCII = false 680 break 681 } 682 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 683 } 684 685 if isASCII { // optimize for ASCII-only strings. 686 if !hasUpper { 687 return s 688 } 689 var ( 690 b Builder 691 pos int 692 ) 693 b.Grow(len(s)) 694 for i := 0; i < len(s); i++ { 695 c := s[i] 696 if 'A' <= c && c <= 'Z' { 697 c += 'a' - 'A' 698 if pos < i { 699 b.WriteString(s[pos:i]) 700 } 701 b.WriteByte(c) 702 pos = i + 1 703 } 704 } 705 if pos < len(s) { 706 b.WriteString(s[pos:]) 707 } 708 return b.String() 709 } 710 return Map(unicode.ToLower, s) 711 } 712 713 // ToTitle returns a copy of the string s with all Unicode letters mapped to 714 // their Unicode title case. 715 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 716 717 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 718 // upper case using the case mapping specified by c. 719 func ToUpperSpecial(c unicode.SpecialCase, s string) string { 720 return Map(c.ToUpper, s) 721 } 722 723 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 724 // lower case using the case mapping specified by c. 725 func ToLowerSpecial(c unicode.SpecialCase, s string) string { 726 return Map(c.ToLower, s) 727 } 728 729 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 730 // Unicode title case, giving priority to the special casing rules. 731 func ToTitleSpecial(c unicode.SpecialCase, s string) string { 732 return Map(c.ToTitle, s) 733 } 734 735 // ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences 736 // replaced by the replacement string, which may be empty. 737 func ToValidUTF8(s, replacement string) string { 738 var b Builder 739 740 for i, c := range s { 741 if c != utf8.RuneError { 742 continue 743 } 744 745 _, wid := utf8.DecodeRuneInString(s[i:]) 746 if wid == 1 { 747 b.Grow(len(s) + len(replacement)) 748 b.WriteString(s[:i]) 749 s = s[i:] 750 break 751 } 752 } 753 754 // Fast path for unchanged input 755 if b.Cap() == 0 { // didn't call b.Grow above 756 return s 757 } 758 759 invalid := false // previous byte was from an invalid UTF-8 sequence 760 for i := 0; i < len(s); { 761 c := s[i] 762 if c < utf8.RuneSelf { 763 i++ 764 invalid = false 765 b.WriteByte(c) 766 continue 767 } 768 _, wid := utf8.DecodeRuneInString(s[i:]) 769 if wid == 1 { 770 i++ 771 if !invalid { 772 invalid = true 773 b.WriteString(replacement) 774 } 775 continue 776 } 777 invalid = false 778 b.WriteString(s[i : i+wid]) 779 i += wid 780 } 781 782 return b.String() 783 } 784 785 // isSeparator reports whether the rune could mark a word boundary. 786 // TODO: update when package unicode captures more of the properties. 787 func isSeparator(r rune) bool { 788 // ASCII alphanumerics and underscore are not separators 789 if r <= 0x7F { 790 switch { 791 case '0' <= r && r <= '9': 792 return false 793 case 'a' <= r && r <= 'z': 794 return false 795 case 'A' <= r && r <= 'Z': 796 return false 797 case r == '_': 798 return false 799 } 800 return true 801 } 802 // Letters and digits are not separators 803 if unicode.IsLetter(r) || unicode.IsDigit(r) { 804 return false 805 } 806 // Otherwise, all we can do for now is treat spaces as separators. 807 return unicode.IsSpace(r) 808 } 809 810 // Title returns a copy of the string s with all Unicode letters that begin words 811 // mapped to their Unicode title case. 812 // 813 // Deprecated: The rule Title uses for word boundaries does not handle Unicode 814 // punctuation properly. Use golang.org/x/text/cases instead. 815 func Title(s string) string { 816 // Use a closure here to remember state. 817 // Hackish but effective. Depends on Map scanning in order and calling 818 // the closure once per rune. 819 prev := ' ' 820 return Map( 821 func(r rune) rune { 822 if isSeparator(prev) { 823 prev = r 824 return unicode.ToTitle(r) 825 } 826 prev = r 827 return r 828 }, 829 s) 830 } 831 832 // TrimLeftFunc returns a slice of the string s with all leading 833 // Unicode code points c satisfying f(c) removed. 834 func TrimLeftFunc(s string, f func(rune) bool) string { 835 i := indexFunc(s, f, false) 836 if i == -1 { 837 return "" 838 } 839 return s[i:] 840 } 841 842 // TrimRightFunc returns a slice of the string s with all trailing 843 // Unicode code points c satisfying f(c) removed. 844 func TrimRightFunc(s string, f func(rune) bool) string { 845 i := lastIndexFunc(s, f, false) 846 if i >= 0 && s[i] >= utf8.RuneSelf { 847 _, wid := utf8.DecodeRuneInString(s[i:]) 848 i += wid 849 } else { 850 i++ 851 } 852 return s[0:i] 853 } 854 855 // TrimFunc returns a slice of the string s with all leading 856 // and trailing Unicode code points c satisfying f(c) removed. 857 func TrimFunc(s string, f func(rune) bool) string { 858 return TrimRightFunc(TrimLeftFunc(s, f), f) 859 } 860 861 // IndexFunc returns the index into s of the first Unicode 862 // code point satisfying f(c), or -1 if none do. 863 func IndexFunc(s string, f func(rune) bool) int { 864 return indexFunc(s, f, true) 865 } 866 867 // LastIndexFunc returns the index into s of the last 868 // Unicode code point satisfying f(c), or -1 if none do. 869 func LastIndexFunc(s string, f func(rune) bool) int { 870 return lastIndexFunc(s, f, true) 871 } 872 873 // indexFunc is the same as IndexFunc except that if 874 // truth==false, the sense of the predicate function is 875 // inverted. 876 func indexFunc(s string, f func(rune) bool, truth bool) int { 877 for i, r := range s { 878 if f(r) == truth { 879 return i 880 } 881 } 882 return -1 883 } 884 885 // lastIndexFunc is the same as LastIndexFunc except that if 886 // truth==false, the sense of the predicate function is 887 // inverted. 888 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 889 for i := len(s); i > 0; { 890 r, size := utf8.DecodeLastRuneInString(s[0:i]) 891 i -= size 892 if f(r) == truth { 893 return i 894 } 895 } 896 return -1 897 } 898 899 // asciiSet is a 32-byte value, where each bit represents the presence of a 900 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 901 // starting with the least-significant bit of the lowest word to the 902 // most-significant bit of the highest word, map to the full range of all 903 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 904 // ensuring that any non-ASCII character will be reported as not in the set. 905 // This allocates a total of 32 bytes even though the upper half 906 // is unused to avoid bounds checks in asciiSet.contains. 907 type asciiSet [8]uint32 908 909 // makeASCIISet creates a set of ASCII characters and reports whether all 910 // characters in chars are ASCII. 911 func makeASCIISet(chars string) (as asciiSet, ok bool) { 912 for i := 0; i < len(chars); i++ { 913 c := chars[i] 914 if c >= utf8.RuneSelf { 915 return as, false 916 } 917 as[c/32] |= 1 << (c % 32) 918 } 919 return as, true 920 } 921 922 // contains reports whether c is inside the set. 923 func (as *asciiSet) contains(c byte) bool { 924 return (as[c/32] & (1 << (c % 32))) != 0 925 } 926 927 // Trim returns a slice of the string s with all leading and 928 // trailing Unicode code points contained in cutset removed. 929 func Trim(s, cutset string) string { 930 if s == "" || cutset == "" { 931 return s 932 } 933 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 934 return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0]) 935 } 936 if as, ok := makeASCIISet(cutset); ok { 937 return trimLeftASCII(trimRightASCII(s, &as), &as) 938 } 939 return trimLeftUnicode(trimRightUnicode(s, cutset), cutset) 940 } 941 942 // TrimLeft returns a slice of the string s with all leading 943 // Unicode code points contained in cutset removed. 944 // 945 // To remove a prefix, use [TrimPrefix] instead. 946 func TrimLeft(s, cutset string) string { 947 if s == "" || cutset == "" { 948 return s 949 } 950 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 951 return trimLeftByte(s, cutset[0]) 952 } 953 if as, ok := makeASCIISet(cutset); ok { 954 return trimLeftASCII(s, &as) 955 } 956 return trimLeftUnicode(s, cutset) 957 } 958 959 func trimLeftByte(s string, c byte) string { 960 for len(s) > 0 && s[0] == c { 961 s = s[1:] 962 } 963 return s 964 } 965 966 func trimLeftASCII(s string, as *asciiSet) string { 967 for len(s) > 0 { 968 if !as.contains(s[0]) { 969 break 970 } 971 s = s[1:] 972 } 973 return s 974 } 975 976 func trimLeftUnicode(s, cutset string) string { 977 for len(s) > 0 { 978 r, n := rune(s[0]), 1 979 if r >= utf8.RuneSelf { 980 r, n = utf8.DecodeRuneInString(s) 981 } 982 if !ContainsRune(cutset, r) { 983 break 984 } 985 s = s[n:] 986 } 987 return s 988 } 989 990 // TrimRight returns a slice of the string s, with all trailing 991 // Unicode code points contained in cutset removed. 992 // 993 // To remove a suffix, use [TrimSuffix] instead. 994 func TrimRight(s, cutset string) string { 995 if s == "" || cutset == "" { 996 return s 997 } 998 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 999 return trimRightByte(s, cutset[0]) 1000 } 1001 if as, ok := makeASCIISet(cutset); ok { 1002 return trimRightASCII(s, &as) 1003 } 1004 return trimRightUnicode(s, cutset) 1005 } 1006 1007 func trimRightByte(s string, c byte) string { 1008 for len(s) > 0 && s[len(s)-1] == c { 1009 s = s[:len(s)-1] 1010 } 1011 return s 1012 } 1013 1014 func trimRightASCII(s string, as *asciiSet) string { 1015 for len(s) > 0 { 1016 if !as.contains(s[len(s)-1]) { 1017 break 1018 } 1019 s = s[:len(s)-1] 1020 } 1021 return s 1022 } 1023 1024 func trimRightUnicode(s, cutset string) string { 1025 for len(s) > 0 { 1026 r, n := rune(s[len(s)-1]), 1 1027 if r >= utf8.RuneSelf { 1028 r, n = utf8.DecodeLastRuneInString(s) 1029 } 1030 if !ContainsRune(cutset, r) { 1031 break 1032 } 1033 s = s[:len(s)-n] 1034 } 1035 return s 1036 } 1037 1038 // TrimSpace returns a slice of the string s, with all leading 1039 // and trailing white space removed, as defined by Unicode. 1040 func TrimSpace(s string) string { 1041 // Fast path for ASCII: look for the first ASCII non-space byte 1042 start := 0 1043 for ; start < len(s); start++ { 1044 c := s[start] 1045 if c >= utf8.RuneSelf { 1046 // If we run into a non-ASCII byte, fall back to the 1047 // slower unicode-aware method on the remaining bytes 1048 return TrimFunc(s[start:], unicode.IsSpace) 1049 } 1050 if asciiSpace[c] == 0 { 1051 break 1052 } 1053 } 1054 1055 // Now look for the first ASCII non-space byte from the end 1056 stop := len(s) 1057 for ; stop > start; stop-- { 1058 c := s[stop-1] 1059 if c >= utf8.RuneSelf { 1060 // start has been already trimmed above, should trim end only 1061 return TrimRightFunc(s[start:stop], unicode.IsSpace) 1062 } 1063 if asciiSpace[c] == 0 { 1064 break 1065 } 1066 } 1067 1068 // At this point s[start:stop] starts and ends with an ASCII 1069 // non-space bytes, so we're done. Non-ASCII cases have already 1070 // been handled above. 1071 return s[start:stop] 1072 } 1073 1074 // TrimPrefix returns s without the provided leading prefix string. 1075 // If s doesn't start with prefix, s is returned unchanged. 1076 func TrimPrefix(s, prefix string) string { 1077 if HasPrefix(s, prefix) { 1078 return s[len(prefix):] 1079 } 1080 return s 1081 } 1082 1083 // TrimSuffix returns s without the provided trailing suffix string. 1084 // If s doesn't end with suffix, s is returned unchanged. 1085 func TrimSuffix(s, suffix string) string { 1086 if HasSuffix(s, suffix) { 1087 return s[:len(s)-len(suffix)] 1088 } 1089 return s 1090 } 1091 1092 // Replace returns a copy of the string s with the first n 1093 // non-overlapping instances of old replaced by new. 1094 // If old is empty, it matches at the beginning of the string 1095 // and after each UTF-8 sequence, yielding up to k+1 replacements 1096 // for a k-rune string. 1097 // If n < 0, there is no limit on the number of replacements. 1098 func Replace(s, old, new string, n int) string { 1099 if old == new || n == 0 { 1100 return s // avoid allocation 1101 } 1102 1103 // Compute number of replacements. 1104 if m := Count(s, old); m == 0 { 1105 return s // avoid allocation 1106 } else if n < 0 || m < n { 1107 n = m 1108 } 1109 1110 // Apply replacements to buffer. 1111 var b Builder 1112 b.Grow(len(s) + n*(len(new)-len(old))) 1113 start := 0 1114 for i := 0; i < n; i++ { 1115 j := start 1116 if len(old) == 0 { 1117 if i > 0 { 1118 _, wid := utf8.DecodeRuneInString(s[start:]) 1119 j += wid 1120 } 1121 } else { 1122 j += Index(s[start:], old) 1123 } 1124 b.WriteString(s[start:j]) 1125 b.WriteString(new) 1126 start = j + len(old) 1127 } 1128 b.WriteString(s[start:]) 1129 return b.String() 1130 } 1131 1132 // ReplaceAll returns a copy of the string s with all 1133 // non-overlapping instances of old replaced by new. 1134 // If old is empty, it matches at the beginning of the string 1135 // and after each UTF-8 sequence, yielding up to k+1 replacements 1136 // for a k-rune string. 1137 func ReplaceAll(s, old, new string) string { 1138 return Replace(s, old, new, -1) 1139 } 1140 1141 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 1142 // are equal under simple Unicode case-folding, which is a more general 1143 // form of case-insensitivity. 1144 func EqualFold(s, t string) bool { 1145 // ASCII fast path 1146 i := 0 1147 for ; i < len(s) && i < len(t); i++ { 1148 sr := s[i] 1149 tr := t[i] 1150 if sr|tr >= utf8.RuneSelf { 1151 goto hasUnicode 1152 } 1153 1154 // Easy case. 1155 if tr == sr { 1156 continue 1157 } 1158 1159 // Make sr < tr to simplify what follows. 1160 if tr < sr { 1161 tr, sr = sr, tr 1162 } 1163 // ASCII only, sr/tr must be upper/lower case 1164 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1165 continue 1166 } 1167 return false 1168 } 1169 // Check if we've exhausted both strings. 1170 return len(s) == len(t) 1171 1172 hasUnicode: 1173 s = s[i:] 1174 t = t[i:] 1175 for _, sr := range s { 1176 // If t is exhausted the strings are not equal. 1177 if len(t) == 0 { 1178 return false 1179 } 1180 1181 // Extract first rune from second string. 1182 var tr rune 1183 if t[0] < utf8.RuneSelf { 1184 tr, t = rune(t[0]), t[1:] 1185 } else { 1186 r, size := utf8.DecodeRuneInString(t) 1187 tr, t = r, t[size:] 1188 } 1189 1190 // If they match, keep going; if not, return false. 1191 1192 // Easy case. 1193 if tr == sr { 1194 continue 1195 } 1196 1197 // Make sr < tr to simplify what follows. 1198 if tr < sr { 1199 tr, sr = sr, tr 1200 } 1201 // Fast check for ASCII. 1202 if tr < utf8.RuneSelf { 1203 // ASCII only, sr/tr must be upper/lower case 1204 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1205 continue 1206 } 1207 return false 1208 } 1209 1210 // General case. SimpleFold(x) returns the next equivalent rune > x 1211 // or wraps around to smaller values. 1212 r := unicode.SimpleFold(sr) 1213 for r != sr && r < tr { 1214 r = unicode.SimpleFold(r) 1215 } 1216 if r == tr { 1217 continue 1218 } 1219 return false 1220 } 1221 1222 // First string is empty, so check if the second one is also empty. 1223 return len(t) == 0 1224 } 1225 1226 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 1227 func Index(s, substr string) int { 1228 n := len(substr) 1229 switch { 1230 case n == 0: 1231 return 0 1232 case n == 1: 1233 return IndexByte(s, substr[0]) 1234 case n == len(s): 1235 if substr == s { 1236 return 0 1237 } 1238 return -1 1239 case n > len(s): 1240 return -1 1241 case n <= bytealg.MaxLen: 1242 // Use brute force when s and substr both are small 1243 if len(s) <= bytealg.MaxBruteForce { 1244 return bytealg.IndexString(s, substr) 1245 } 1246 c0 := substr[0] 1247 c1 := substr[1] 1248 i := 0 1249 t := len(s) - n + 1 1250 fails := 0 1251 for i < t { 1252 if s[i] != c0 { 1253 // IndexByte is faster than bytealg.IndexString, so use it as long as 1254 // we're not getting lots of false positives. 1255 o := IndexByte(s[i+1:t], c0) 1256 if o < 0 { 1257 return -1 1258 } 1259 i += o + 1 1260 } 1261 if s[i+1] == c1 && s[i:i+n] == substr { 1262 return i 1263 } 1264 fails++ 1265 i++ 1266 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 1267 if fails > bytealg.Cutover(i) { 1268 r := bytealg.IndexString(s[i:], substr) 1269 if r >= 0 { 1270 return r + i 1271 } 1272 return -1 1273 } 1274 } 1275 return -1 1276 } 1277 c0 := substr[0] 1278 c1 := substr[1] 1279 i := 0 1280 t := len(s) - n + 1 1281 fails := 0 1282 for i < t { 1283 if s[i] != c0 { 1284 o := IndexByte(s[i+1:t], c0) 1285 if o < 0 { 1286 return -1 1287 } 1288 i += o + 1 1289 } 1290 if s[i+1] == c1 && s[i:i+n] == substr { 1291 return i 1292 } 1293 i++ 1294 fails++ 1295 if fails >= 4+i>>4 && i < t { 1296 // See comment in ../bytes/bytes.go. 1297 j := bytealg.IndexRabinKarp(s[i:], substr) 1298 if j < 0 { 1299 return -1 1300 } 1301 return i + j 1302 } 1303 } 1304 return -1 1305 } 1306 1307 // Cut slices s around the first instance of sep, 1308 // returning the text before and after sep. 1309 // The found result reports whether sep appears in s. 1310 // If sep does not appear in s, cut returns s, "", false. 1311 func Cut(s, sep string) (before, after string, found bool) { 1312 if i := Index(s, sep); i >= 0 { 1313 return s[:i], s[i+len(sep):], true 1314 } 1315 return s, "", false 1316 } 1317 1318 // CutPrefix returns s without the provided leading prefix string 1319 // and reports whether it found the prefix. 1320 // If s doesn't start with prefix, CutPrefix returns s, false. 1321 // If prefix is the empty string, CutPrefix returns s, true. 1322 func CutPrefix(s, prefix string) (after string, found bool) { 1323 if !HasPrefix(s, prefix) { 1324 return s, false 1325 } 1326 return s[len(prefix):], true 1327 } 1328 1329 // CutSuffix returns s without the provided ending suffix string 1330 // and reports whether it found the suffix. 1331 // If s doesn't end with suffix, CutSuffix returns s, false. 1332 // If suffix is the empty string, CutSuffix returns s, true. 1333 func CutSuffix(s, suffix string) (before string, found bool) { 1334 if !HasSuffix(s, suffix) { 1335 return s, false 1336 } 1337 return s[:len(s)-len(suffix)], true 1338 }