github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/strings/strings.gno (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate UTF-8 encoded strings. 6 // 7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8 package strings 9 10 import ( 11 "unicode" 12 "unicode/utf8" 13 14 "internal/bytealg" 15 ) 16 17 // explode splits s into a slice of UTF-8 strings, 18 // one string per Unicode character up to a maximum of n (n < 0 means no limit). 19 // Invalid UTF-8 sequences become correct encodings of U+FFFD. 20 func explode(s string, n int) []string { 21 l := utf8.RuneCountInString(s) 22 if n < 0 || n > l { 23 n = l 24 } 25 a := make([]string, n) 26 for i := 0; i < n-1; i++ { 27 ch, size := utf8.DecodeRuneInString(s) 28 a[i] = s[:size] 29 s = s[size:] 30 if ch == utf8.RuneError { 31 a[i] = string(utf8.RuneError) 32 } 33 } 34 if n > 0 { 35 a[n-1] = s 36 } 37 return a 38 } 39 40 // Count counts the number of non-overlapping instances of substr in s. 41 // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. 42 func Count(s, substr string) int { 43 // special case 44 if len(substr) == 0 { 45 return utf8.RuneCountInString(s) + 1 46 } 47 if len(substr) == 1 { 48 return bytealg.CountString(s, substr[0]) 49 } 50 n := 0 51 for { 52 i := Index(s, substr) 53 if i == -1 { 54 return n 55 } 56 n++ 57 s = s[i+len(substr):] 58 } 59 } 60 61 // Contains reports whether substr is within s. 62 func Contains(s, substr string) bool { 63 return Index(s, substr) >= 0 64 } 65 66 // ContainsAny reports whether any Unicode code points in chars are within s. 67 func ContainsAny(s, chars string) bool { 68 return IndexAny(s, chars) >= 0 69 } 70 71 // ContainsRune reports whether the Unicode code point r is within s. 72 func ContainsRune(s string, r rune) bool { 73 return IndexRune(s, r) >= 0 74 } 75 76 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 77 func LastIndex(s, substr string) int { 78 n := len(substr) 79 switch { 80 case n == 0: 81 return len(s) 82 case n == 1: 83 return LastIndexByte(s, substr[0]) 84 case n == len(s): 85 if substr == s { 86 return 0 87 } 88 return -1 89 case n > len(s): 90 return -1 91 } 92 // Rabin-Karp search from the end of the string 93 hashss, pow := bytealg.HashStrRev(substr) 94 last := len(s) - n 95 var h uint32 96 for i := len(s) - 1; i >= last; i-- { 97 h = h*bytealg.PrimeRK + uint32(s[i]) 98 } 99 if h == hashss && s[last:] == substr { 100 return last 101 } 102 for i := last - 1; i >= 0; i-- { 103 h *= bytealg.PrimeRK 104 h += uint32(s[i]) 105 h -= pow * uint32(s[i+n]) 106 if h == hashss && s[i:i+n] == substr { 107 return i 108 } 109 } 110 return -1 111 } 112 113 // IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s. 114 func IndexByte(s string, c byte) int { 115 return bytealg.IndexByteString(s, c) 116 } 117 118 // IndexRune returns the index of the first instance of the Unicode code point 119 // r, or -1 if rune is not present in s. 120 // If r is utf8.RuneError, it returns the first instance of any 121 // invalid UTF-8 byte sequence. 122 func IndexRune(s string, r rune) int { 123 switch { 124 case 0 <= r && r < utf8.RuneSelf: 125 return IndexByte(s, byte(r)) 126 case r == utf8.RuneError: 127 for i, r := range s { 128 if r == utf8.RuneError { 129 return i 130 } 131 } 132 return -1 133 case !utf8.ValidRune(r): 134 return -1 135 default: 136 return Index(s, string(r)) 137 } 138 } 139 140 // IndexAny returns the index of the first instance of any Unicode code point 141 // from chars in s, or -1 if no Unicode code point from chars is present in s. 142 func IndexAny(s, chars string) int { 143 if chars == "" { 144 // Avoid scanning all of s. 145 return -1 146 } 147 if len(chars) == 1 { 148 // Avoid scanning all of s. 149 r := rune(chars[0]) 150 if r >= utf8.RuneSelf { 151 r = utf8.RuneError 152 } 153 return IndexRune(s, r) 154 } 155 if len(s) > 8 { 156 if as, isASCII := makeASCIISet(chars); isASCII { 157 for i := 0; i < len(s); i++ { 158 if as.contains(s[i]) { 159 return i 160 } 161 } 162 return -1 163 } 164 } 165 for i, c := range s { 166 if IndexRune(chars, c) >= 0 { 167 return i 168 } 169 } 170 return -1 171 } 172 173 // LastIndexAny returns the index of the last instance of any Unicode code 174 // point from chars in s, or -1 if no Unicode code point from chars is 175 // present in s. 176 func LastIndexAny(s, chars string) int { 177 if chars == "" { 178 // Avoid scanning all of s. 179 return -1 180 } 181 if len(s) == 1 { 182 rc := rune(s[0]) 183 if rc >= utf8.RuneSelf { 184 rc = utf8.RuneError 185 } 186 if IndexRune(chars, rc) >= 0 { 187 return 0 188 } 189 return -1 190 } 191 if len(s) > 8 { 192 if as, isASCII := makeASCIISet(chars); isASCII { 193 for i := len(s) - 1; i >= 0; i-- { 194 if as.contains(s[i]) { 195 return i 196 } 197 } 198 return -1 199 } 200 } 201 if len(chars) == 1 { 202 rc := rune(chars[0]) 203 if rc >= utf8.RuneSelf { 204 rc = utf8.RuneError 205 } 206 for i := len(s); i > 0; { 207 r, size := utf8.DecodeLastRuneInString(s[:i]) 208 i -= size 209 if rc == r { 210 return i 211 } 212 } 213 return -1 214 } 215 for i := len(s); i > 0; { 216 r, size := utf8.DecodeLastRuneInString(s[:i]) 217 i -= size 218 if IndexRune(chars, r) >= 0 { 219 return i 220 } 221 } 222 return -1 223 } 224 225 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 226 func LastIndexByte(s string, c byte) int { 227 for i := len(s) - 1; i >= 0; i-- { 228 if s[i] == c { 229 return i 230 } 231 } 232 return -1 233 } 234 235 // Generic split: splits after each instance of sep, 236 // including sepSave bytes of sep in the subarrays. 237 func genSplit(s, sep string, sepSave, n int) []string { 238 if n == 0 { 239 return nil 240 } 241 if sep == "" { 242 return explode(s, n) 243 } 244 if n < 0 { 245 n = Count(s, sep) + 1 246 } 247 248 a := make([]string, n) 249 n-- 250 i := 0 251 for i < n { 252 m := Index(s, sep) 253 if m < 0 { 254 break 255 } 256 a[i] = s[:m+sepSave] 257 s = s[m+len(sep):] 258 i++ 259 } 260 a[i] = s 261 return a[:i+1] 262 } 263 264 // SplitN slices s into substrings separated by sep and returns a slice of 265 // the substrings between those separators. 266 // 267 // The count determines the number of substrings to return: 268 // 269 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 270 // n == 0: the result is nil (zero substrings) 271 // n < 0: all substrings 272 // 273 // Edge cases for s and sep (for example, empty strings) are handled 274 // as described in the documentation for Split. 275 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 276 277 // SplitAfterN slices s into substrings after each instance of sep and 278 // returns a slice of those substrings. 279 // 280 // The count determines the number of substrings to return: 281 // 282 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 283 // n == 0: the result is nil (zero substrings) 284 // n < 0: all substrings 285 // 286 // Edge cases for s and sep (for example, empty strings) are handled 287 // as described in the documentation for SplitAfter. 288 func SplitAfterN(s, sep string, n int) []string { 289 return genSplit(s, sep, len(sep), n) 290 } 291 292 // Split slices s into all substrings separated by sep and returns a slice of 293 // the substrings between those separators. 294 // 295 // If s does not contain sep and sep is not empty, Split returns a 296 // slice of length 1 whose only element is s. 297 // 298 // If sep is empty, Split splits after each UTF-8 sequence. If both s 299 // and sep are empty, Split returns an empty slice. 300 // 301 // It is equivalent to SplitN with a count of -1. 302 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 303 304 // SplitAfter slices s into all substrings after each instance of sep and 305 // returns a slice of those substrings. 306 // 307 // If s does not contain sep and sep is not empty, SplitAfter returns 308 // a slice of length 1 whose only element is s. 309 // 310 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If 311 // both s and sep are empty, SplitAfter returns an empty slice. 312 // 313 // It is equivalent to SplitAfterN with a count of -1. 314 func SplitAfter(s, sep string) []string { 315 return genSplit(s, sep, len(sep), -1) 316 } 317 318 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 319 320 // Fields splits the string s around each instance of one or more consecutive white space 321 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 322 // empty slice if s contains only white space. 323 func Fields(s string) []string { 324 // First count the fields. 325 // This is an exact count if s is ASCII, otherwise it is an approximation. 326 n := 0 327 wasSpace := 1 328 // setBits is used to track which bits are set in the bytes of s. 329 setBits := uint8(0) 330 for i := 0; i < len(s); i++ { 331 r := s[i] 332 setBits |= r 333 isSpace := int(asciiSpace[r]) 334 n += wasSpace & ^isSpace 335 wasSpace = isSpace 336 } 337 338 if setBits >= utf8.RuneSelf { 339 // Some runes in the input string are not ASCII. 340 return FieldsFunc(s, unicode.IsSpace) 341 } 342 // ASCII fast path 343 a := make([]string, n) 344 na := 0 345 fieldStart := 0 346 i := 0 347 // Skip spaces in the front of the input. 348 for i < len(s) && asciiSpace[s[i]] != 0 { 349 i++ 350 } 351 fieldStart = i 352 for i < len(s) { 353 if asciiSpace[s[i]] == 0 { 354 i++ 355 continue 356 } 357 a[na] = s[fieldStart:i] 358 na++ 359 i++ 360 // Skip spaces in between fields. 361 for i < len(s) && asciiSpace[s[i]] != 0 { 362 i++ 363 } 364 fieldStart = i 365 } 366 if fieldStart < len(s) { // Last field might end at EOF. 367 a[na] = s[fieldStart:] 368 } 369 return a 370 } 371 372 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 373 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 374 // string is empty, an empty slice is returned. 375 // 376 // FieldsFunc makes no guarantees about the order in which it calls f(c) 377 // and assumes that f always returns the same value for a given c. 378 func FieldsFunc(s string, f func(rune) bool) []string { 379 // A span is used to record a slice of s of the form s[start:end]. 380 // The start index is inclusive and the end index is exclusive. 381 type span struct { 382 start int 383 end int 384 } 385 spans := make([]span, 0, 32) 386 387 // Find the field start and end indices. 388 // Doing this in a separate pass (rather than slicing the string s 389 // and collecting the result substrings right away) is significantly 390 // more efficient, possibly due to cache effects. 391 start := -1 // valid span start if >= 0 392 for end, r := range s { // XXX rename rune to r. 393 if f(r) { 394 if start >= 0 { 395 spans = append(spans, span{start, end}) 396 // Set start to a negative value. 397 // Note: using -1 here consistently and reproducibly 398 // slows down this code by a several percent on amd64. 399 start = ^start 400 } 401 } else { 402 if start < 0 { 403 start = end 404 } 405 } 406 } 407 408 // Last field might end at EOF. 409 if start >= 0 { 410 spans = append(spans, span{start, len(s)}) 411 } 412 413 // Create strings from recorded field indices. 414 a := make([]string, len(spans)) 415 for i, span := range spans { 416 a[i] = s[span.start:span.end] 417 } 418 419 return a 420 } 421 422 // Join concatenates the elements of its first argument to create a single string. The separator 423 // string sep is placed between elements in the resulting string. 424 func Join(elems []string, sep string) string { 425 switch len(elems) { 426 case 0: 427 return "" 428 case 1: 429 return elems[0] 430 } 431 n := len(sep) * (len(elems) - 1) 432 for i := 0; i < len(elems); i++ { 433 n += len(elems[i]) 434 } 435 436 var b Builder 437 b.Grow(n) 438 b.WriteString(elems[0]) 439 for _, s := range elems[1:] { 440 b.WriteString(sep) 441 b.WriteString(s) 442 } 443 return b.String() 444 } 445 446 // HasPrefix tests whether the string s begins with prefix. 447 func HasPrefix(s, prefix string) bool { 448 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 449 } 450 451 // HasSuffix tests whether the string s ends with suffix. 452 func HasSuffix(s, suffix string) bool { 453 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 454 } 455 456 // Map returns a copy of the string s with all its characters modified 457 // according to the mapping function. If mapping returns a negative value, the character is 458 // dropped from the string with no replacement. 459 func Map(mapping func(rune) rune, s string) string { 460 // In the worst case, the string can grow when mapped, making 461 // things unpleasant. But it's so rare we barge in assuming it's 462 // fine. It could also shrink but that falls out naturally. 463 464 // The output buffer b is initialized on demand, the first 465 // time a character differs. 466 var b Builder 467 468 for i, c := range s { 469 r := mapping(c) 470 if r == c && c != utf8.RuneError { 471 continue 472 } 473 474 var width int 475 if c == utf8.RuneError { 476 c, width = utf8.DecodeRuneInString(s[i:]) 477 if width != 1 && r == c { 478 continue 479 } 480 } else { 481 width = utf8.RuneLen(c) 482 } 483 484 b.Grow(len(s) + utf8.UTFMax) 485 b.WriteString(s[:i]) 486 if r >= 0 { 487 b.WriteRune(r) 488 } 489 490 s = s[i+width:] 491 break 492 } 493 494 // Fast path for unchanged input 495 if b.Cap() == 0 { // didn't call b.Grow above 496 return s 497 } 498 499 for _, c := range s { 500 r := mapping(c) 501 502 if r >= 0 { 503 // common case 504 // Due to inlining, it is more performant to determine if WriteByte should be 505 // invoked rather than always call WriteRune 506 if r < utf8.RuneSelf { 507 b.WriteByte(byte(r)) 508 } else { 509 // r is not a ASCII rune. 510 b.WriteRune(r) 511 } 512 } 513 } 514 515 return b.String() 516 } 517 518 // Repeat returns a new string consisting of count copies of the string s. 519 // 520 // It panics if count is negative or if 521 // the result of (len(s) * count) overflows. 522 func Repeat(s string, count int) string { 523 if count == 0 { 524 return "" 525 } 526 527 // Since we cannot return an error on overflow, 528 // we should panic if the repeat will generate 529 // an overflow. 530 // See Issue golang.org/issue/16237 531 if count < 0 { 532 panic("strings: negative Repeat count") 533 } else if len(s)*count/count != len(s) { 534 panic("strings: Repeat count causes overflow") 535 } 536 537 n := len(s) * count 538 var b Builder 539 b.Grow(n) 540 b.WriteString(s) 541 for b.Len() < n { 542 if b.Len() <= n/2 { 543 b.WriteString(b.String()) 544 } else { 545 b.WriteString(b.String()[:n-b.Len()]) 546 break 547 } 548 } 549 return b.String() 550 } 551 552 // ToUpper returns s with all Unicode letters mapped to their upper case. 553 func ToUpper(s string) string { 554 isASCII, hasLower := true, false 555 for i := 0; i < len(s); i++ { 556 c := s[i] 557 if c >= utf8.RuneSelf { 558 isASCII = false 559 break 560 } 561 hasLower = hasLower || ('a' <= c && c <= 'z') 562 } 563 564 if isASCII { // optimize for ASCII-only strings. 565 if !hasLower { 566 return s 567 } 568 var b Builder 569 b.Grow(len(s)) 570 for i := 0; i < len(s); i++ { 571 c := s[i] 572 if 'a' <= c && c <= 'z' { 573 c -= 'a' - 'A' 574 } 575 b.WriteByte(c) 576 } 577 return b.String() 578 } 579 return Map(unicode.ToUpper, s) 580 } 581 582 // ToLower returns s with all Unicode letters mapped to their lower case. 583 func ToLower(s string) string { 584 isASCII, hasUpper := true, false 585 for i := 0; i < len(s); i++ { 586 c := s[i] 587 if c >= utf8.RuneSelf { 588 isASCII = false 589 break 590 } 591 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 592 } 593 594 if isASCII { // optimize for ASCII-only strings. 595 if !hasUpper { 596 return s 597 } 598 var b Builder 599 b.Grow(len(s)) 600 for i := 0; i < len(s); i++ { 601 c := s[i] 602 if 'A' <= c && c <= 'Z' { 603 c += 'a' - 'A' 604 } 605 b.WriteByte(c) 606 } 607 return b.String() 608 } 609 return Map(unicode.ToLower, s) 610 } 611 612 // ToTitle returns a copy of the string s with all Unicode letters mapped to 613 // their Unicode title case. 614 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 615 616 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 617 // upper case using the case mapping specified by c. 618 func ToUpperSpecial(c unicode.SpecialCase, s string) string { 619 return Map(c.ToUpper, s) 620 } 621 622 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 623 // lower case using the case mapping specified by c. 624 func ToLowerSpecial(c unicode.SpecialCase, s string) string { 625 return Map(c.ToLower, s) 626 } 627 628 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 629 // Unicode title case, giving priority to the special casing rules. 630 func ToTitleSpecial(c unicode.SpecialCase, s string) string { 631 return Map(c.ToTitle, s) 632 } 633 634 // ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences 635 // replaced by the replacement string, which may be empty. 636 func ToValidUTF8(s, replacement string) string { 637 var b Builder 638 639 for i, c := range s { 640 if c != utf8.RuneError { 641 continue 642 } 643 644 _, wid := utf8.DecodeRuneInString(s[i:]) 645 if wid == 1 { 646 b.Grow(len(s) + len(replacement)) 647 b.WriteString(s[:i]) 648 s = s[i:] 649 break 650 } 651 } 652 653 // Fast path for unchanged input 654 if b.Cap() == 0 { // didn't call b.Grow above 655 return s 656 } 657 658 invalid := false // previous byte was from an invalid UTF-8 sequence 659 for i := 0; i < len(s); { 660 c := s[i] 661 if c < utf8.RuneSelf { 662 i++ 663 invalid = false 664 b.WriteByte(c) 665 continue 666 } 667 _, wid := utf8.DecodeRuneInString(s[i:]) 668 if wid == 1 { 669 i++ 670 if !invalid { 671 invalid = true 672 b.WriteString(replacement) 673 } 674 continue 675 } 676 invalid = false 677 b.WriteString(s[i : i+wid]) 678 i += wid 679 } 680 681 return b.String() 682 } 683 684 // isSeparator reports whether the rune could mark a word boundary. 685 // TODO: update when package unicode captures more of the properties. 686 func isSeparator(r rune) bool { 687 // ASCII alphanumerics and underscore are not separators 688 if r <= 0x7F { 689 switch { 690 case '0' <= r && r <= '9': 691 return false 692 case 'a' <= r && r <= 'z': 693 return false 694 case 'A' <= r && r <= 'Z': 695 return false 696 case r == '_': 697 return false 698 } 699 return true 700 } 701 // Letters and digits are not separators 702 if unicode.IsLetter(r) || unicode.IsDigit(r) { 703 return false 704 } 705 // Otherwise, all we can do for now is treat spaces as separators. 706 return unicode.IsSpace(r) 707 } 708 709 // Title returns a copy of the string s with all Unicode letters that begin words 710 // mapped to their Unicode title case. 711 // 712 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 713 func Title(s string) string { 714 // Use a closure here to remember state. 715 // Hackish but effective. Depends on Map scanning in order and calling 716 // the closure once per rune. 717 prev := ' ' 718 return Map( 719 func(r rune) rune { 720 if isSeparator(prev) { 721 prev = r 722 return unicode.ToTitle(r) 723 } 724 prev = r 725 return r 726 }, 727 s) 728 } 729 730 // TrimLeftFunc returns a slice of the string s with all leading 731 // Unicode code points c satisfying f(c) removed. 732 func TrimLeftFunc(s string, f func(rune) bool) string { 733 i := indexFunc(s, f, false) 734 if i == -1 { 735 return "" 736 } 737 return s[i:] 738 } 739 740 // TrimRightFunc returns a slice of the string s with all trailing 741 // Unicode code points c satisfying f(c) removed. 742 func TrimRightFunc(s string, f func(rune) bool) string { 743 i := lastIndexFunc(s, f, false) 744 if i >= 0 && s[i] >= utf8.RuneSelf { 745 _, wid := utf8.DecodeRuneInString(s[i:]) 746 i += wid 747 } else { 748 i++ 749 } 750 return s[0:i] 751 } 752 753 // TrimFunc returns a slice of the string s with all leading 754 // and trailing Unicode code points c satisfying f(c) removed. 755 func TrimFunc(s string, f func(rune) bool) string { 756 return TrimRightFunc(TrimLeftFunc(s, f), f) 757 } 758 759 // IndexFunc returns the index into s of the first Unicode 760 // code point satisfying f(c), or -1 if none do. 761 func IndexFunc(s string, f func(rune) bool) int { 762 return indexFunc(s, f, true) 763 } 764 765 // LastIndexFunc returns the index into s of the last 766 // Unicode code point satisfying f(c), or -1 if none do. 767 func LastIndexFunc(s string, f func(rune) bool) int { 768 return lastIndexFunc(s, f, true) 769 } 770 771 // indexFunc is the same as IndexFunc except that if 772 // truth==false, the sense of the predicate function is 773 // inverted. 774 func indexFunc(s string, f func(rune) bool, truth bool) int { 775 for i, r := range s { 776 if f(r) == truth { 777 return i 778 } 779 } 780 return -1 781 } 782 783 // lastIndexFunc is the same as LastIndexFunc except that if 784 // truth==false, the sense of the predicate function is 785 // inverted. 786 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 787 for i := len(s); i > 0; { 788 r, size := utf8.DecodeLastRuneInString(s[0:i]) 789 i -= size 790 if f(r) == truth { 791 return i 792 } 793 } 794 return -1 795 } 796 797 // asciiSet is a 32-byte value, where each bit represents the presence of a 798 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 799 // starting with the least-significant bit of the lowest word to the 800 // most-significant bit of the highest word, map to the full range of all 801 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 802 // ensuring that any non-ASCII character will be reported as not in the set. 803 type asciiSet [8]uint32 804 805 // makeASCIISet creates a set of ASCII characters and reports whether all 806 // characters in chars are ASCII. 807 func makeASCIISet(chars string) (as asciiSet, ok bool) { 808 for i := 0; i < len(chars); i++ { 809 c := chars[i] 810 if c >= utf8.RuneSelf { 811 return as, false 812 } 813 as[c>>5] |= 1 << uint(c&31) 814 } 815 return as, true 816 } 817 818 // contains reports whether c is inside the set. 819 func (as *asciiSet) contains(c byte) bool { 820 return (as[c>>5] & (1 << uint(c&31))) != 0 821 } 822 823 func makeCutsetFunc(cutset string) func(rune) bool { 824 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 825 return func(r rune) bool { 826 return r == rune(cutset[0]) 827 } 828 } 829 if as, isASCII := makeASCIISet(cutset); isASCII { 830 return func(r rune) bool { 831 return r < utf8.RuneSelf && as.contains(byte(r)) 832 } 833 } 834 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 835 } 836 837 // Trim returns a slice of the string s with all leading and 838 // trailing Unicode code points contained in cutset removed. 839 func Trim(s, cutset string) string { 840 if s == "" || cutset == "" { 841 return s 842 } 843 return TrimFunc(s, makeCutsetFunc(cutset)) 844 } 845 846 // TrimLeft returns a slice of the string s with all leading 847 // Unicode code points contained in cutset removed. 848 // 849 // To remove a prefix, use TrimPrefix instead. 850 func TrimLeft(s, cutset string) string { 851 if s == "" || cutset == "" { 852 return s 853 } 854 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 855 } 856 857 // TrimRight returns a slice of the string s, with all trailing 858 // Unicode code points contained in cutset removed. 859 // 860 // To remove a suffix, use TrimSuffix instead. 861 func TrimRight(s, cutset string) string { 862 if s == "" || cutset == "" { 863 return s 864 } 865 return TrimRightFunc(s, makeCutsetFunc(cutset)) 866 } 867 868 // TrimSpace returns a slice of the string s, with all leading 869 // and trailing white space removed, as defined by Unicode. 870 func TrimSpace(s string) string { 871 // Fast path for ASCII: look for the first ASCII non-space byte 872 start := 0 873 for ; start < len(s); start++ { 874 c := s[start] 875 if c >= utf8.RuneSelf { 876 // If we run into a non-ASCII byte, fall back to the 877 // slower unicode-aware method on the remaining bytes 878 return TrimFunc(s[start:], unicode.IsSpace) 879 } 880 if asciiSpace[c] == 0 { 881 break 882 } 883 } 884 885 // Now look for the first ASCII non-space byte from the end 886 stop := len(s) 887 for ; stop > start; stop-- { 888 c := s[stop-1] 889 if c >= utf8.RuneSelf { 890 return TrimFunc(s[start:stop], unicode.IsSpace) 891 } 892 if asciiSpace[c] == 0 { 893 break 894 } 895 } 896 897 // At this point s[start:stop] starts and ends with an ASCII 898 // non-space bytes, so we're done. Non-ASCII cases have already 899 // been handled above. 900 return s[start:stop] 901 } 902 903 // TrimPrefix returns s without the provided leading prefix string. 904 // If s doesn't start with prefix, s is returned unchanged. 905 func TrimPrefix(s, prefix string) string { 906 if HasPrefix(s, prefix) { 907 return s[len(prefix):] 908 } 909 return s 910 } 911 912 // TrimSuffix returns s without the provided trailing suffix string. 913 // If s doesn't end with suffix, s is returned unchanged. 914 func TrimSuffix(s, suffix string) string { 915 if HasSuffix(s, suffix) { 916 return s[:len(s)-len(suffix)] 917 } 918 return s 919 } 920 921 // Replace returns a copy of the string s with the first n 922 // non-overlapping instances of old replaced by new. 923 // If old is empty, it matches at the beginning of the string 924 // and after each UTF-8 sequence, yielding up to k+1 replacements 925 // for a k-rune string. 926 // If n < 0, there is no limit on the number of replacements. 927 // XXX rename new 928 func Replace(s, old, new_ string, n int) string { 929 if old == new_ || n == 0 { 930 return s // avoid allocation 931 } 932 933 // Compute number of replacements. 934 if m := Count(s, old); m == 0 { 935 return s // avoid allocation 936 } else if n < 0 || m < n { 937 n = m 938 } 939 940 // Apply replacements to buffer. 941 var b Builder 942 b.Grow(len(s) + n*(len(new_)-len(old))) 943 start := 0 944 for i := 0; i < n; i++ { 945 j := start 946 if len(old) == 0 { 947 if i > 0 { 948 _, wid := utf8.DecodeRuneInString(s[start:]) 949 j += wid 950 } 951 } else { 952 j += Index(s[start:], old) 953 } 954 b.WriteString(s[start:j]) 955 b.WriteString(new_) 956 start = j + len(old) 957 } 958 b.WriteString(s[start:]) 959 return b.String() 960 } 961 962 // ReplaceAll returns a copy of the string s with all 963 // non-overlapping instances of old replaced by new. 964 // If old is empty, it matches at the beginning of the string 965 // and after each UTF-8 sequence, yielding up to k+1 replacements 966 // for a k-rune string. 967 func ReplaceAll(s, old, new_ string) string { 968 return Replace(s, old, new_, -1) 969 } 970 971 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 972 // are equal under Unicode case-folding, which is a more general 973 // form of case-insensitivity. 974 func EqualFold(s, t string) bool { 975 for s != "" && t != "" { 976 // Extract first rune from each string. 977 var sr, tr rune 978 if s[0] < utf8.RuneSelf { 979 sr, s = rune(s[0]), s[1:] 980 } else { 981 r, size := utf8.DecodeRuneInString(s) 982 sr, s = r, s[size:] 983 } 984 if t[0] < utf8.RuneSelf { 985 tr, t = rune(t[0]), t[1:] 986 } else { 987 r, size := utf8.DecodeRuneInString(t) 988 tr, t = r, t[size:] 989 } 990 991 // If they match, keep going; if not, return false. 992 993 // Easy case. 994 if tr == sr { 995 continue 996 } 997 998 // Make sr < tr to simplify what follows. 999 if tr < sr { 1000 tr, sr = sr, tr 1001 } 1002 // Fast check for ASCII. 1003 if tr < utf8.RuneSelf { 1004 // ASCII only, sr/tr must be upper/lower case 1005 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1006 continue 1007 } 1008 return false 1009 } 1010 1011 // General case. SimpleFold(x) returns the next equivalent rune > x 1012 // or wraps around to smaller values. 1013 r := unicode.SimpleFold(sr) 1014 for r != sr && r < tr { 1015 r = unicode.SimpleFold(r) 1016 } 1017 if r == tr { 1018 continue 1019 } 1020 return false 1021 } 1022 1023 // One string is empty. Are both? 1024 return s == t 1025 } 1026 1027 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s. 1028 func Index(s, substr string) int { 1029 n := len(substr) 1030 switch { 1031 case n == 0: 1032 return 0 1033 case n == 1: 1034 return IndexByte(s, substr[0]) 1035 case n == len(s): 1036 if substr == s { 1037 return 0 1038 } 1039 return -1 1040 case n > len(s): 1041 return -1 1042 case n <= bytealg.MaxLen: 1043 // Use brute force when s and substr both are small 1044 if len(s) <= bytealg.MaxBruteForce { 1045 return bytealg.IndexString(s, substr) 1046 } 1047 c0 := substr[0] 1048 c1 := substr[1] 1049 i := 0 1050 t := len(s) - n + 1 1051 fails := 0 1052 for i < t { 1053 if s[i] != c0 { 1054 // IndexByte is faster than bytealg.IndexString, so use it as long as 1055 // we're not getting lots of false positives. 1056 o := IndexByte(s[i+1:t], c0) 1057 if o < 0 { 1058 return -1 1059 } 1060 i += o + 1 1061 } 1062 if s[i+1] == c1 && s[i:i+n] == substr { 1063 return i 1064 } 1065 fails++ 1066 i++ 1067 // Switch to bytealg.IndexString when IndexByte produces too many false positives. 1068 if fails > bytealg.Cutover(i) { 1069 r := bytealg.IndexString(s[i:], substr) 1070 if r >= 0 { 1071 return r + i 1072 } 1073 return -1 1074 } 1075 } 1076 return -1 1077 } 1078 c0 := substr[0] 1079 c1 := substr[1] 1080 i := 0 1081 t := len(s) - n + 1 1082 fails := 0 1083 for i < t { 1084 if s[i] != c0 { 1085 o := IndexByte(s[i+1:t], c0) 1086 if o < 0 { 1087 return -1 1088 } 1089 i += o + 1 1090 } 1091 if s[i+1] == c1 && s[i:i+n] == substr { 1092 return i 1093 } 1094 i++ 1095 fails++ 1096 if fails >= 4+i>>4 && i < t { 1097 // See comment in ../bytes/bytes.go. 1098 j := bytealg.IndexRabinKarp(s[i:], substr) 1099 if j < 0 { 1100 return -1 1101 } 1102 return i + j 1103 } 1104 } 1105 return -1 1106 } 1107 1108 // Cut slices s around the first instance of sep, 1109 // returning the text before and after sep. 1110 // The found result reports whether sep appears in s. 1111 // If sep does not appear in s, cut returns s, "", false. 1112 func Cut(s, sep string) (before, after string, found bool) { 1113 if i := Index(s, sep); i >= 0 { 1114 return s[:i], s[i+len(sep):], true 1115 } 1116 return s, "", false 1117 } 1118 1119 // CutPrefix returns s without the provided leading prefix string 1120 // and reports whether it found the prefix. 1121 // If s doesn't start with prefix, CutPrefix returns s, false. 1122 // If prefix is the empty string, CutPrefix returns s, true. 1123 func CutPrefix(s, prefix string) (after string, found bool) { 1124 if !HasPrefix(s, prefix) { 1125 return s, false 1126 } 1127 return s[len(prefix):], true 1128 } 1129 1130 // CutSuffix returns s without the provided ending suffix string 1131 // and reports whether it found the suffix. 1132 // If s doesn't end with suffix, CutSuffix returns s, false. 1133 // If suffix is the empty string, CutSuffix returns s, true. 1134 func CutSuffix(s, suffix string) (before string, found bool) { 1135 if !HasSuffix(s, suffix) { 1136 return s, false 1137 } 1138 return s[:len(s)-len(suffix)], true 1139 }