github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/strings/strings.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate UTF-8 encoded strings. 6 // 7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings. 8 package strings 9 10 import ( 11 "unicode" 12 "unicode/utf8" 13 ) 14 15 // explode splits s into a slice of UTF-8 strings, 16 // one string per Unicode character up to a maximum of n (n < 0 means no limit). 17 // Invalid UTF-8 sequences become correct encodings of U+FFFD. 18 func explode(s string, n int) []string { 19 l := utf8.RuneCountInString(s) 20 if n < 0 || n > l { 21 n = l 22 } 23 a := make([]string, n) 24 for i := 0; i < n-1; i++ { 25 ch, size := utf8.DecodeRuneInString(s) 26 a[i] = s[:size] 27 s = s[size:] 28 if ch == utf8.RuneError { 29 a[i] = string(utf8.RuneError) 30 } 31 } 32 if n > 0 { 33 a[n-1] = s 34 } 35 return a 36 } 37 38 // primeRK is the prime base used in Rabin-Karp algorithm. 39 const primeRK = 16777619 40 41 // hashStr returns the hash and the appropriate multiplicative 42 // factor for use in Rabin-Karp algorithm. 43 func hashStr(sep string) (uint32, uint32) { 44 hash := uint32(0) 45 for i := 0; i < len(sep); i++ { 46 hash = hash*primeRK + uint32(sep[i]) 47 } 48 var pow, sq uint32 = 1, primeRK 49 for i := len(sep); i > 0; i >>= 1 { 50 if i&1 != 0 { 51 pow *= sq 52 } 53 sq *= sq 54 } 55 return hash, pow 56 } 57 58 // hashStrRev returns the hash of the reverse of sep and the 59 // appropriate multiplicative factor for use in Rabin-Karp algorithm. 60 func hashStrRev(sep string) (uint32, uint32) { 61 hash := uint32(0) 62 for i := len(sep) - 1; i >= 0; i-- { 63 hash = hash*primeRK + uint32(sep[i]) 64 } 65 var pow, sq uint32 = 1, primeRK 66 for i := len(sep); i > 0; i >>= 1 { 67 if i&1 != 0 { 68 pow *= sq 69 } 70 sq *= sq 71 } 72 return hash, pow 73 } 74 75 // countGeneric implements Count. 76 func countGeneric(s, substr string) int { 77 // special case 78 if len(substr) == 0 { 79 return utf8.RuneCountInString(s) + 1 80 } 81 n := 0 82 for { 83 i := Index(s, substr) 84 if i == -1 { 85 return n 86 } 87 n++ 88 s = s[i+len(substr):] 89 } 90 } 91 92 // Contains reports whether substr is within s. 93 func Contains(s, substr string) bool { 94 return Index(s, substr) >= 0 95 } 96 97 // ContainsAny reports whether any Unicode code points in chars are within s. 98 func ContainsAny(s, chars string) bool { 99 return IndexAny(s, chars) >= 0 100 } 101 102 // ContainsRune reports whether the Unicode code point r is within s. 103 func ContainsRune(s string, r rune) bool { 104 return IndexRune(s, r) >= 0 105 } 106 107 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s. 108 func LastIndex(s, substr string) int { 109 n := len(substr) 110 switch { 111 case n == 0: 112 return len(s) 113 case n == 1: 114 return LastIndexByte(s, substr[0]) 115 case n == len(s): 116 if substr == s { 117 return 0 118 } 119 return -1 120 case n > len(s): 121 return -1 122 } 123 // Rabin-Karp search from the end of the string 124 hashss, pow := hashStrRev(substr) 125 last := len(s) - n 126 var h uint32 127 for i := len(s) - 1; i >= last; i-- { 128 h = h*primeRK + uint32(s[i]) 129 } 130 if h == hashss && s[last:] == substr { 131 return last 132 } 133 for i := last - 1; i >= 0; i-- { 134 h *= primeRK 135 h += uint32(s[i]) 136 h -= pow * uint32(s[i+n]) 137 if h == hashss && s[i:i+n] == substr { 138 return i 139 } 140 } 141 return -1 142 } 143 144 // IndexRune returns the index of the first instance of the Unicode code point 145 // r, or -1 if rune is not present in s. 146 // If r is utf8.RuneError, it returns the first instance of any 147 // invalid UTF-8 byte sequence. 148 func IndexRune(s string, r rune) int { 149 switch { 150 case 0 <= r && r < utf8.RuneSelf: 151 return IndexByte(s, byte(r)) 152 case r == utf8.RuneError: 153 for i, r := range s { 154 if r == utf8.RuneError { 155 return i 156 } 157 } 158 return -1 159 case !utf8.ValidRune(r): 160 return -1 161 default: 162 return Index(s, string(r)) 163 } 164 } 165 166 // IndexAny returns the index of the first instance of any Unicode code point 167 // from chars in s, or -1 if no Unicode code point from chars is present in s. 168 func IndexAny(s, chars string) int { 169 if chars == "" { 170 // Avoid scanning all of s. 171 return -1 172 } 173 if len(s) > 8 { 174 if as, isASCII := makeASCIISet(chars); isASCII { 175 for i := 0; i < len(s); i++ { 176 if as.contains(s[i]) { 177 return i 178 } 179 } 180 return -1 181 } 182 } 183 for i, c := range s { 184 for _, m := range chars { 185 if c == m { 186 return i 187 } 188 } 189 } 190 return -1 191 } 192 193 // LastIndexAny returns the index of the last instance of any Unicode code 194 // point from chars in s, or -1 if no Unicode code point from chars is 195 // present in s. 196 func LastIndexAny(s, chars string) int { 197 if chars == "" { 198 // Avoid scanning all of s. 199 return -1 200 } 201 if len(s) > 8 { 202 if as, isASCII := makeASCIISet(chars); isASCII { 203 for i := len(s) - 1; i >= 0; i-- { 204 if as.contains(s[i]) { 205 return i 206 } 207 } 208 return -1 209 } 210 } 211 for i := len(s); i > 0; { 212 r, size := utf8.DecodeLastRuneInString(s[:i]) 213 i -= size 214 for _, c := range chars { 215 if r == c { 216 return i 217 } 218 } 219 } 220 return -1 221 } 222 223 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 224 func LastIndexByte(s string, c byte) int { 225 for i := len(s) - 1; i >= 0; i-- { 226 if s[i] == c { 227 return i 228 } 229 } 230 return -1 231 } 232 233 // Generic split: splits after each instance of sep, 234 // including sepSave bytes of sep in the subarrays. 235 func genSplit(s, sep string, sepSave, n int) []string { 236 if n == 0 { 237 return nil 238 } 239 if sep == "" { 240 return explode(s, n) 241 } 242 if n < 0 { 243 n = Count(s, sep) + 1 244 } 245 246 a := make([]string, n) 247 n-- 248 i := 0 249 for i < n { 250 m := Index(s, sep) 251 if m < 0 { 252 break 253 } 254 a[i] = s[:m+sepSave] 255 s = s[m+len(sep):] 256 i++ 257 } 258 a[i] = s 259 return a[:i+1] 260 } 261 262 // SplitN slices s into substrings separated by sep and returns a slice of 263 // the substrings between those separators. 264 // 265 // The count determines the number of substrings to return: 266 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 267 // n == 0: the result is nil (zero substrings) 268 // n < 0: all substrings 269 // 270 // Edge cases for s and sep (for example, empty strings) are handled 271 // as described in the documentation for Split. 272 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 273 274 // SplitAfterN slices s into substrings after each instance of sep and 275 // returns a slice of those substrings. 276 // 277 // The count determines the number of substrings to return: 278 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 279 // n == 0: the result is nil (zero substrings) 280 // n < 0: all substrings 281 // 282 // Edge cases for s and sep (for example, empty strings) are handled 283 // as described in the documentation for SplitAfter. 284 func SplitAfterN(s, sep string, n int) []string { 285 return genSplit(s, sep, len(sep), n) 286 } 287 288 // Split slices s into all substrings separated by sep and returns a slice of 289 // the substrings between those separators. 290 // 291 // If s does not contain sep and sep is not empty, Split returns a 292 // slice of length 1 whose only element is s. 293 // 294 // If sep is empty, Split splits after each UTF-8 sequence. If both s 295 // and sep are empty, Split returns an empty slice. 296 // 297 // It is equivalent to SplitN with a count of -1. 298 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 299 300 // SplitAfter slices s into all substrings after each instance of sep and 301 // returns a slice of those substrings. 302 // 303 // If s does not contain sep and sep is not empty, SplitAfter returns 304 // a slice of length 1 whose only element is s. 305 // 306 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If 307 // both s and sep are empty, SplitAfter returns an empty slice. 308 // 309 // It is equivalent to SplitAfterN with a count of -1. 310 func SplitAfter(s, sep string) []string { 311 return genSplit(s, sep, len(sep), -1) 312 } 313 314 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 315 316 // Fields splits the string s around each instance of one or more consecutive white space 317 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an 318 // empty slice if s contains only white space. 319 func Fields(s string) []string { 320 // First count the fields. 321 // This is an exact count if s is ASCII, otherwise it is an approximation. 322 n := 0 323 wasSpace := 1 324 // setBits is used to track which bits are set in the bytes of s. 325 setBits := uint8(0) 326 for i := 0; i < len(s); i++ { 327 r := s[i] 328 setBits |= r 329 isSpace := int(asciiSpace[r]) 330 n += wasSpace & ^isSpace 331 wasSpace = isSpace 332 } 333 334 if setBits < utf8.RuneSelf { // ASCII fast path 335 a := make([]string, n) 336 na := 0 337 fieldStart := 0 338 i := 0 339 // Skip spaces in the front of the input. 340 for i < len(s) && asciiSpace[s[i]] != 0 { 341 i++ 342 } 343 fieldStart = i 344 for i < len(s) { 345 if asciiSpace[s[i]] == 0 { 346 i++ 347 continue 348 } 349 a[na] = s[fieldStart:i] 350 na++ 351 i++ 352 // Skip spaces in between fields. 353 for i < len(s) && asciiSpace[s[i]] != 0 { 354 i++ 355 } 356 fieldStart = i 357 } 358 if fieldStart < len(s) { // Last field might end at EOF. 359 a[na] = s[fieldStart:] 360 } 361 return a 362 } 363 364 // Some runes in the input string are not ASCII. 365 return FieldsFunc(s, unicode.IsSpace) 366 } 367 368 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 369 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 370 // string is empty, an empty slice is returned. 371 // FieldsFunc makes no guarantees about the order in which it calls f(c). 372 // If f does not return consistent results for a given c, FieldsFunc may crash. 373 func FieldsFunc(s string, f func(rune) bool) []string { 374 // A span is used to record a slice of s of the form s[start:end]. 375 // The start index is inclusive and the end index is exclusive. 376 type span struct { 377 start int 378 end int 379 } 380 spans := make([]span, 0, 32) 381 382 // Find the field start and end indices. 383 wasField := false 384 fromIndex := 0 385 for i, rune := range s { 386 if f(rune) { 387 if wasField { 388 spans = append(spans, span{start: fromIndex, end: i}) 389 wasField = false 390 } 391 } else { 392 if !wasField { 393 fromIndex = i 394 wasField = true 395 } 396 } 397 } 398 399 // Last field might end at EOF. 400 if wasField { 401 spans = append(spans, span{fromIndex, len(s)}) 402 } 403 404 // Create strings from recorded field indices. 405 a := make([]string, len(spans)) 406 for i, span := range spans { 407 a[i] = s[span.start:span.end] 408 } 409 410 return a 411 } 412 413 // Join concatenates the elements of a to create a single string. The separator string 414 // sep is placed between elements in the resulting string. 415 func Join(a []string, sep string) string { 416 switch len(a) { 417 case 0: 418 return "" 419 case 1: 420 return a[0] 421 case 2: 422 // Special case for common small values. 423 // Remove if golang.org/issue/6714 is fixed 424 return a[0] + sep + a[1] 425 case 3: 426 // Special case for common small values. 427 // Remove if golang.org/issue/6714 is fixed 428 return a[0] + sep + a[1] + sep + a[2] 429 } 430 n := len(sep) * (len(a) - 1) 431 for i := 0; i < len(a); i++ { 432 n += len(a[i]) 433 } 434 435 b := make([]byte, n) 436 bp := copy(b, a[0]) 437 for _, s := range a[1:] { 438 bp += copy(b[bp:], sep) 439 bp += copy(b[bp:], s) 440 } 441 return string(b) 442 } 443 444 // HasPrefix tests whether the string s begins with prefix. 445 func HasPrefix(s, prefix string) bool { 446 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 447 } 448 449 // HasSuffix tests whether the string s ends with suffix. 450 func HasSuffix(s, suffix string) bool { 451 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 452 } 453 454 // Map returns a copy of the string s with all its characters modified 455 // according to the mapping function. If mapping returns a negative value, the character is 456 // dropped from the string with no replacement. 457 func Map(mapping func(rune) rune, s string) string { 458 // In the worst case, the string can grow when mapped, making 459 // things unpleasant. But it's so rare we barge in assuming it's 460 // fine. It could also shrink but that falls out naturally. 461 462 // The output buffer b is initialized on demand, the first 463 // time a character differs. 464 var b []byte 465 // nbytes is the number of bytes encoded in b. 466 var nbytes int 467 468 for i, c := range s { 469 r := mapping(c) 470 if r == c { 471 continue 472 } 473 474 b = make([]byte, len(s)+utf8.UTFMax) 475 nbytes = copy(b, s[:i]) 476 if r >= 0 { 477 if r < utf8.RuneSelf { 478 b[nbytes] = byte(r) 479 nbytes++ 480 } else { 481 nbytes += utf8.EncodeRune(b[nbytes:], r) 482 } 483 } 484 485 if c == utf8.RuneError { 486 // RuneError is the result of either decoding 487 // an invalid sequence or '\uFFFD'. Determine 488 // the correct number of bytes we need to advance. 489 _, w := utf8.DecodeRuneInString(s[i:]) 490 i += w 491 } else { 492 i += utf8.RuneLen(c) 493 } 494 495 s = s[i:] 496 break 497 } 498 499 if b == nil { 500 return s 501 } 502 503 for _, c := range s { 504 r := mapping(c) 505 506 // common case 507 if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) { 508 b[nbytes] = byte(r) 509 nbytes++ 510 continue 511 } 512 513 // b is not big enough or r is not a ASCII rune. 514 if r >= 0 { 515 if nbytes+utf8.UTFMax >= len(b) { 516 // Grow the buffer. 517 nb := make([]byte, 2*len(b)) 518 copy(nb, b[:nbytes]) 519 b = nb 520 } 521 nbytes += utf8.EncodeRune(b[nbytes:], r) 522 } 523 } 524 525 return string(b[:nbytes]) 526 } 527 528 // Repeat returns a new string consisting of count copies of the string s. 529 // 530 // It panics if count is negative or if 531 // the result of (len(s) * count) overflows. 532 func Repeat(s string, count int) string { 533 // Since we cannot return an error on overflow, 534 // we should panic if the repeat will generate 535 // an overflow. 536 // See Issue golang.org/issue/16237 537 if count < 0 { 538 panic("strings: negative Repeat count") 539 } else if count > 0 && len(s)*count/count != len(s) { 540 panic("strings: Repeat count causes overflow") 541 } 542 543 b := make([]byte, len(s)*count) 544 bp := copy(b, s) 545 for bp < len(b) { 546 copy(b[bp:], b[:bp]) 547 bp *= 2 548 } 549 return string(b) 550 } 551 552 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. 553 func ToUpper(s string) string { 554 isASCII, hasLower := true, false 555 for i := 0; i < len(s); i++ { 556 c := s[i] 557 if c >= utf8.RuneSelf { 558 isASCII = false 559 break 560 } 561 hasLower = hasLower || (c >= 'a' && c <= 'z') 562 } 563 564 if isASCII { // optimize for ASCII-only strings. 565 if !hasLower { 566 return s 567 } 568 b := make([]byte, len(s)) 569 for i := 0; i < len(s); i++ { 570 c := s[i] 571 if c >= 'a' && c <= 'z' { 572 c -= 'a' - 'A' 573 } 574 b[i] = c 575 } 576 return string(b) 577 } 578 return Map(unicode.ToUpper, s) 579 } 580 581 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. 582 func ToLower(s string) string { 583 isASCII, hasUpper := true, false 584 for i := 0; i < len(s); i++ { 585 c := s[i] 586 if c >= utf8.RuneSelf { 587 isASCII = false 588 break 589 } 590 hasUpper = hasUpper || (c >= 'A' && c <= 'Z') 591 } 592 593 if isASCII { // optimize for ASCII-only strings. 594 if !hasUpper { 595 return s 596 } 597 b := make([]byte, len(s)) 598 for i := 0; i < len(s); i++ { 599 c := s[i] 600 if c >= 'A' && c <= 'Z' { 601 c += 'a' - 'A' 602 } 603 b[i] = c 604 } 605 return string(b) 606 } 607 return Map(unicode.ToLower, s) 608 } 609 610 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. 611 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 612 613 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 614 // upper case, giving priority to the special casing rules. 615 func ToUpperSpecial(c unicode.SpecialCase, s string) string { 616 return Map(func(r rune) rune { return c.ToUpper(r) }, s) 617 } 618 619 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 620 // lower case, giving priority to the special casing rules. 621 func ToLowerSpecial(c unicode.SpecialCase, s string) string { 622 return Map(func(r rune) rune { return c.ToLower(r) }, s) 623 } 624 625 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 626 // title case, giving priority to the special casing rules. 627 func ToTitleSpecial(c unicode.SpecialCase, s string) string { 628 return Map(func(r rune) rune { return c.ToTitle(r) }, s) 629 } 630 631 // isSeparator reports whether the rune could mark a word boundary. 632 // TODO: update when package unicode captures more of the properties. 633 func isSeparator(r rune) bool { 634 // ASCII alphanumerics and underscore are not separators 635 if r <= 0x7F { 636 switch { 637 case '0' <= r && r <= '9': 638 return false 639 case 'a' <= r && r <= 'z': 640 return false 641 case 'A' <= r && r <= 'Z': 642 return false 643 case r == '_': 644 return false 645 } 646 return true 647 } 648 // Letters and digits are not separators 649 if unicode.IsLetter(r) || unicode.IsDigit(r) { 650 return false 651 } 652 // Otherwise, all we can do for now is treat spaces as separators. 653 return unicode.IsSpace(r) 654 } 655 656 // Title returns a copy of the string s with all Unicode letters that begin words 657 // mapped to their title case. 658 // 659 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 660 func Title(s string) string { 661 // Use a closure here to remember state. 662 // Hackish but effective. Depends on Map scanning in order and calling 663 // the closure once per rune. 664 prev := ' ' 665 return Map( 666 func(r rune) rune { 667 if isSeparator(prev) { 668 prev = r 669 return unicode.ToTitle(r) 670 } 671 prev = r 672 return r 673 }, 674 s) 675 } 676 677 // TrimLeftFunc returns a slice of the string s with all leading 678 // Unicode code points c satisfying f(c) removed. 679 func TrimLeftFunc(s string, f func(rune) bool) string { 680 i := indexFunc(s, f, false) 681 if i == -1 { 682 return "" 683 } 684 return s[i:] 685 } 686 687 // TrimRightFunc returns a slice of the string s with all trailing 688 // Unicode code points c satisfying f(c) removed. 689 func TrimRightFunc(s string, f func(rune) bool) string { 690 i := lastIndexFunc(s, f, false) 691 if i >= 0 && s[i] >= utf8.RuneSelf { 692 _, wid := utf8.DecodeRuneInString(s[i:]) 693 i += wid 694 } else { 695 i++ 696 } 697 return s[0:i] 698 } 699 700 // TrimFunc returns a slice of the string s with all leading 701 // and trailing Unicode code points c satisfying f(c) removed. 702 func TrimFunc(s string, f func(rune) bool) string { 703 return TrimRightFunc(TrimLeftFunc(s, f), f) 704 } 705 706 // IndexFunc returns the index into s of the first Unicode 707 // code point satisfying f(c), or -1 if none do. 708 func IndexFunc(s string, f func(rune) bool) int { 709 return indexFunc(s, f, true) 710 } 711 712 // LastIndexFunc returns the index into s of the last 713 // Unicode code point satisfying f(c), or -1 if none do. 714 func LastIndexFunc(s string, f func(rune) bool) int { 715 return lastIndexFunc(s, f, true) 716 } 717 718 // indexFunc is the same as IndexFunc except that if 719 // truth==false, the sense of the predicate function is 720 // inverted. 721 func indexFunc(s string, f func(rune) bool, truth bool) int { 722 for i, r := range s { 723 if f(r) == truth { 724 return i 725 } 726 } 727 return -1 728 } 729 730 // lastIndexFunc is the same as LastIndexFunc except that if 731 // truth==false, the sense of the predicate function is 732 // inverted. 733 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 734 for i := len(s); i > 0; { 735 r, size := utf8.DecodeLastRuneInString(s[0:i]) 736 i -= size 737 if f(r) == truth { 738 return i 739 } 740 } 741 return -1 742 } 743 744 // asciiSet is a 32-byte value, where each bit represents the presence of a 745 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 746 // starting with the least-significant bit of the lowest word to the 747 // most-significant bit of the highest word, map to the full range of all 748 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 749 // ensuring that any non-ASCII character will be reported as not in the set. 750 type asciiSet [8]uint32 751 752 // makeASCIISet creates a set of ASCII characters and reports whether all 753 // characters in chars are ASCII. 754 func makeASCIISet(chars string) (as asciiSet, ok bool) { 755 for i := 0; i < len(chars); i++ { 756 c := chars[i] 757 if c >= utf8.RuneSelf { 758 return as, false 759 } 760 as[c>>5] |= 1 << uint(c&31) 761 } 762 return as, true 763 } 764 765 // contains reports whether c is inside the set. 766 func (as *asciiSet) contains(c byte) bool { 767 return (as[c>>5] & (1 << uint(c&31))) != 0 768 } 769 770 func makeCutsetFunc(cutset string) func(rune) bool { 771 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 772 return func(r rune) bool { 773 return r == rune(cutset[0]) 774 } 775 } 776 if as, isASCII := makeASCIISet(cutset); isASCII { 777 return func(r rune) bool { 778 return r < utf8.RuneSelf && as.contains(byte(r)) 779 } 780 } 781 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 782 } 783 784 // Trim returns a slice of the string s with all leading and 785 // trailing Unicode code points contained in cutset removed. 786 func Trim(s string, cutset string) string { 787 if s == "" || cutset == "" { 788 return s 789 } 790 return TrimFunc(s, makeCutsetFunc(cutset)) 791 } 792 793 // TrimLeft returns a slice of the string s with all leading 794 // Unicode code points contained in cutset removed. 795 func TrimLeft(s string, cutset string) string { 796 if s == "" || cutset == "" { 797 return s 798 } 799 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 800 } 801 802 // TrimRight returns a slice of the string s, with all trailing 803 // Unicode code points contained in cutset removed. 804 func TrimRight(s string, cutset string) string { 805 if s == "" || cutset == "" { 806 return s 807 } 808 return TrimRightFunc(s, makeCutsetFunc(cutset)) 809 } 810 811 // TrimSpace returns a slice of the string s, with all leading 812 // and trailing white space removed, as defined by Unicode. 813 func TrimSpace(s string) string { 814 return TrimFunc(s, unicode.IsSpace) 815 } 816 817 // TrimPrefix returns s without the provided leading prefix string. 818 // If s doesn't start with prefix, s is returned unchanged. 819 func TrimPrefix(s, prefix string) string { 820 if HasPrefix(s, prefix) { 821 return s[len(prefix):] 822 } 823 return s 824 } 825 826 // TrimSuffix returns s without the provided trailing suffix string. 827 // If s doesn't end with suffix, s is returned unchanged. 828 func TrimSuffix(s, suffix string) string { 829 if HasSuffix(s, suffix) { 830 return s[:len(s)-len(suffix)] 831 } 832 return s 833 } 834 835 // Replace returns a copy of the string s with the first n 836 // non-overlapping instances of old replaced by new. 837 // If old is empty, it matches at the beginning of the string 838 // and after each UTF-8 sequence, yielding up to k+1 replacements 839 // for a k-rune string. 840 // If n < 0, there is no limit on the number of replacements. 841 func Replace(s, old, new string, n int) string { 842 if old == new || n == 0 { 843 return s // avoid allocation 844 } 845 846 // Compute number of replacements. 847 if m := Count(s, old); m == 0 { 848 return s // avoid allocation 849 } else if n < 0 || m < n { 850 n = m 851 } 852 853 // Apply replacements to buffer. 854 t := make([]byte, len(s)+n*(len(new)-len(old))) 855 w := 0 856 start := 0 857 for i := 0; i < n; i++ { 858 j := start 859 if len(old) == 0 { 860 if i > 0 { 861 _, wid := utf8.DecodeRuneInString(s[start:]) 862 j += wid 863 } 864 } else { 865 j += Index(s[start:], old) 866 } 867 w += copy(t[w:], s[start:j]) 868 w += copy(t[w:], new) 869 start = j + len(old) 870 } 871 w += copy(t[w:], s[start:]) 872 return string(t[0:w]) 873 } 874 875 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 876 // are equal under Unicode case-folding. 877 func EqualFold(s, t string) bool { 878 for s != "" && t != "" { 879 // Extract first rune from each string. 880 var sr, tr rune 881 if s[0] < utf8.RuneSelf { 882 sr, s = rune(s[0]), s[1:] 883 } else { 884 r, size := utf8.DecodeRuneInString(s) 885 sr, s = r, s[size:] 886 } 887 if t[0] < utf8.RuneSelf { 888 tr, t = rune(t[0]), t[1:] 889 } else { 890 r, size := utf8.DecodeRuneInString(t) 891 tr, t = r, t[size:] 892 } 893 894 // If they match, keep going; if not, return false. 895 896 // Easy case. 897 if tr == sr { 898 continue 899 } 900 901 // Make sr < tr to simplify what follows. 902 if tr < sr { 903 tr, sr = sr, tr 904 } 905 // Fast check for ASCII. 906 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 907 // ASCII, and sr is upper case. tr must be lower case. 908 if tr == sr+'a'-'A' { 909 continue 910 } 911 return false 912 } 913 914 // General case. SimpleFold(x) returns the next equivalent rune > x 915 // or wraps around to smaller values. 916 r := unicode.SimpleFold(sr) 917 for r != sr && r < tr { 918 r = unicode.SimpleFold(r) 919 } 920 if r == tr { 921 continue 922 } 923 return false 924 } 925 926 // One string is empty. Are both? 927 return s == t 928 } 929 930 func indexRabinKarp(s, substr string) int { 931 // Rabin-Karp search 932 hashss, pow := hashStr(substr) 933 n := len(substr) 934 var h uint32 935 for i := 0; i < n; i++ { 936 h = h*primeRK + uint32(s[i]) 937 } 938 if h == hashss && s[:n] == substr { 939 return 0 940 } 941 for i := n; i < len(s); { 942 h *= primeRK 943 h += uint32(s[i]) 944 h -= pow * uint32(s[i-n]) 945 i++ 946 if h == hashss && s[i-n:i] == substr { 947 return i - n 948 } 949 } 950 return -1 951 952 }