github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/strings/strings.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package strings implements simple functions to manipulate strings. 6 package strings 7 8 import ( 9 "unicode" 10 "unicode/utf8" 11 ) 12 13 // explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit). 14 // Invalid UTF-8 sequences become correct encodings of U+FFF8. 15 func explode(s string, n int) []string { 16 if n == 0 { 17 return nil 18 } 19 l := utf8.RuneCountInString(s) 20 if n <= 0 || n > l { 21 n = l 22 } 23 a := make([]string, n) 24 var size int 25 var ch rune 26 i, cur := 0, 0 27 for ; i+1 < n; i++ { 28 ch, size = utf8.DecodeRuneInString(s[cur:]) 29 if ch == utf8.RuneError { 30 a[i] = string(utf8.RuneError) 31 } else { 32 a[i] = s[cur : cur+size] 33 } 34 cur += size 35 } 36 // add the rest, if there is any 37 if cur < len(s) { 38 a[i] = s[cur:] 39 } 40 return a 41 } 42 43 // primeRK is the prime base used in Rabin-Karp algorithm. 44 const primeRK = 16777619 45 46 // hashstr returns the hash and the appropriate multiplicative 47 // factor for use in Rabin-Karp algorithm. 48 func hashstr(sep string) (uint32, uint32) { 49 hash := uint32(0) 50 for i := 0; i < len(sep); i++ { 51 hash = hash*primeRK + uint32(sep[i]) 52 53 } 54 var pow, sq uint32 = 1, primeRK 55 for i := len(sep); i > 0; i >>= 1 { 56 if i&1 != 0 { 57 pow *= sq 58 } 59 sq *= sq 60 } 61 return hash, pow 62 } 63 64 // Count counts the number of non-overlapping instances of sep in s. 65 func Count(s, sep string) int { 66 n := 0 67 // special cases 68 switch { 69 case len(sep) == 0: 70 return utf8.RuneCountInString(s) + 1 71 case len(sep) == 1: 72 // special case worth making fast 73 c := sep[0] 74 for i := 0; i < len(s); i++ { 75 if s[i] == c { 76 n++ 77 } 78 } 79 return n 80 case len(sep) > len(s): 81 return 0 82 case len(sep) == len(s): 83 if sep == s { 84 return 1 85 } 86 return 0 87 } 88 hashsep, pow := hashstr(sep) 89 h := uint32(0) 90 for i := 0; i < len(sep); i++ { 91 h = h*primeRK + uint32(s[i]) 92 } 93 lastmatch := 0 94 if h == hashsep && s[:len(sep)] == sep { 95 n++ 96 lastmatch = len(sep) 97 } 98 for i := len(sep); i < len(s); { 99 h *= primeRK 100 h += uint32(s[i]) 101 h -= pow * uint32(s[i-len(sep)]) 102 i++ 103 if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { 104 n++ 105 lastmatch = i 106 } 107 } 108 return n 109 } 110 111 // Contains returns true if substr is within s. 112 func Contains(s, substr string) bool { 113 return Index(s, substr) >= 0 114 } 115 116 // ContainsAny returns true if any Unicode code points in chars are within s. 117 func ContainsAny(s, chars string) bool { 118 return IndexAny(s, chars) >= 0 119 } 120 121 // ContainsRune returns true if the Unicode code point r is within s. 122 func ContainsRune(s string, r rune) bool { 123 return IndexRune(s, r) >= 0 124 } 125 126 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 127 func Index(s, sep string) int { 128 n := len(sep) 129 switch { 130 case n == 0: 131 return 0 132 case n == 1: 133 return IndexByte(s, sep[0]) 134 case n == len(s): 135 if sep == s { 136 return 0 137 } 138 return -1 139 case n > len(s): 140 return -1 141 } 142 // Hash sep. 143 hashsep, pow := hashstr(sep) 144 var h uint32 145 for i := 0; i < n; i++ { 146 h = h*primeRK + uint32(s[i]) 147 } 148 if h == hashsep && s[:n] == sep { 149 return 0 150 } 151 for i := n; i < len(s); { 152 h *= primeRK 153 h += uint32(s[i]) 154 h -= pow * uint32(s[i-n]) 155 i++ 156 if h == hashsep && s[i-n:i] == sep { 157 return i - n 158 } 159 } 160 return -1 161 } 162 163 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 164 func LastIndex(s, sep string) int { 165 n := len(sep) 166 if n == 0 { 167 return len(s) 168 } 169 c := sep[0] 170 if n == 1 { 171 // special case worth making fast 172 for i := len(s) - 1; i >= 0; i-- { 173 if s[i] == c { 174 return i 175 } 176 } 177 return -1 178 } 179 // n > 1 180 for i := len(s) - n; i >= 0; i-- { 181 if s[i] == c && s[i:i+n] == sep { 182 return i 183 } 184 } 185 return -1 186 } 187 188 // IndexRune returns the index of the first instance of the Unicode code point 189 // r, or -1 if rune is not present in s. 190 func IndexRune(s string, r rune) int { 191 switch { 192 case r < 0x80: 193 b := byte(r) 194 for i := 0; i < len(s); i++ { 195 if s[i] == b { 196 return i 197 } 198 } 199 default: 200 for i, c := range s { 201 if c == r { 202 return i 203 } 204 } 205 } 206 return -1 207 } 208 209 // IndexAny returns the index of the first instance of any Unicode code point 210 // from chars in s, or -1 if no Unicode code point from chars is present in s. 211 func IndexAny(s, chars string) int { 212 if len(chars) > 0 { 213 for i, c := range s { 214 for _, m := range chars { 215 if c == m { 216 return i 217 } 218 } 219 } 220 } 221 return -1 222 } 223 224 // LastIndexAny returns the index of the last instance of any Unicode code 225 // point from chars in s, or -1 if no Unicode code point from chars is 226 // present in s. 227 func LastIndexAny(s, chars string) int { 228 if len(chars) > 0 { 229 for i := len(s); i > 0; { 230 rune, size := utf8.DecodeLastRuneInString(s[0:i]) 231 i -= size 232 for _, m := range chars { 233 if rune == m { 234 return i 235 } 236 } 237 } 238 } 239 return -1 240 } 241 242 // Generic split: splits after each instance of sep, 243 // including sepSave bytes of sep in the subarrays. 244 func genSplit(s, sep string, sepSave, n int) []string { 245 if n == 0 { 246 return nil 247 } 248 if sep == "" { 249 return explode(s, n) 250 } 251 if n < 0 { 252 n = Count(s, sep) + 1 253 } 254 c := sep[0] 255 start := 0 256 a := make([]string, n) 257 na := 0 258 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { 259 if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { 260 a[na] = s[start : i+sepSave] 261 na++ 262 start = i + len(sep) 263 i += len(sep) - 1 264 } 265 } 266 a[na] = s[start:] 267 return a[0 : na+1] 268 } 269 270 // SplitN slices s into substrings separated by sep and returns a slice of 271 // the substrings between those separators. 272 // If sep is empty, SplitN splits after each UTF-8 sequence. 273 // The count determines the number of substrings to return: 274 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 275 // n == 0: the result is nil (zero substrings) 276 // n < 0: all substrings 277 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } 278 279 // SplitAfterN slices s into substrings after each instance of sep and 280 // returns a slice of those substrings. 281 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 282 // The count determines the number of substrings to return: 283 // n > 0: at most n substrings; the last substring will be the unsplit remainder. 284 // n == 0: the result is nil (zero substrings) 285 // n < 0: all substrings 286 func SplitAfterN(s, sep string, n int) []string { 287 return genSplit(s, sep, len(sep), n) 288 } 289 290 // Split slices s into all substrings separated by sep and returns a slice of 291 // the substrings between those separators. 292 // If sep is empty, Split splits after each UTF-8 sequence. 293 // It is equivalent to SplitN with a count of -1. 294 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) } 295 296 // SplitAfter slices s into all substrings after each instance of sep and 297 // returns a slice of those substrings. 298 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 299 // It is equivalent to SplitAfterN with a count of -1. 300 func SplitAfter(s, sep string) []string { 301 return genSplit(s, sep, len(sep), -1) 302 } 303 304 // Fields splits the string s around each instance of one or more consecutive white space 305 // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an 306 // empty list if s contains only white space. 307 func Fields(s string) []string { 308 return FieldsFunc(s, unicode.IsSpace) 309 } 310 311 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) 312 // and returns an array of slices of s. If all code points in s satisfy f(c) or the 313 // string is empty, an empty slice is returned. 314 func FieldsFunc(s string, f func(rune) bool) []string { 315 // First count the fields. 316 n := 0 317 inField := false 318 for _, rune := range s { 319 wasInField := inField 320 inField = !f(rune) 321 if inField && !wasInField { 322 n++ 323 } 324 } 325 326 // Now create them. 327 a := make([]string, n) 328 na := 0 329 fieldStart := -1 // Set to -1 when looking for start of field. 330 for i, rune := range s { 331 if f(rune) { 332 if fieldStart >= 0 { 333 a[na] = s[fieldStart:i] 334 na++ 335 fieldStart = -1 336 } 337 } else if fieldStart == -1 { 338 fieldStart = i 339 } 340 } 341 if fieldStart >= 0 { // Last field might end at EOF. 342 a[na] = s[fieldStart:] 343 } 344 return a 345 } 346 347 // Join concatenates the elements of a to create a single string. The separator string 348 // sep is placed between elements in the resulting string. 349 func Join(a []string, sep string) string { 350 if len(a) == 0 { 351 return "" 352 } 353 if len(a) == 1 { 354 return a[0] 355 } 356 n := len(sep) * (len(a) - 1) 357 for i := 0; i < len(a); i++ { 358 n += len(a[i]) 359 } 360 361 b := make([]byte, n) 362 bp := copy(b, a[0]) 363 for _, s := range a[1:] { 364 bp += copy(b[bp:], sep) 365 bp += copy(b[bp:], s) 366 } 367 return string(b) 368 } 369 370 // HasPrefix tests whether the string s begins with prefix. 371 func HasPrefix(s, prefix string) bool { 372 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix 373 } 374 375 // HasSuffix tests whether the string s ends with suffix. 376 func HasSuffix(s, suffix string) bool { 377 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix 378 } 379 380 // Map returns a copy of the string s with all its characters modified 381 // according to the mapping function. If mapping returns a negative value, the character is 382 // dropped from the string with no replacement. 383 func Map(mapping func(rune) rune, s string) string { 384 // In the worst case, the string can grow when mapped, making 385 // things unpleasant. But it's so rare we barge in assuming it's 386 // fine. It could also shrink but that falls out naturally. 387 maxbytes := len(s) // length of b 388 nbytes := 0 // number of bytes encoded in b 389 // The output buffer b is initialized on demand, the first 390 // time a character differs. 391 var b []byte 392 393 for i, c := range s { 394 r := mapping(c) 395 if b == nil { 396 if r == c { 397 continue 398 } 399 b = make([]byte, maxbytes) 400 nbytes = copy(b, s[:i]) 401 } 402 if r >= 0 { 403 wid := 1 404 if r >= utf8.RuneSelf { 405 wid = utf8.RuneLen(r) 406 } 407 if nbytes+wid > maxbytes { 408 // Grow the buffer. 409 maxbytes = maxbytes*2 + utf8.UTFMax 410 nb := make([]byte, maxbytes) 411 copy(nb, b[0:nbytes]) 412 b = nb 413 } 414 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 415 } 416 } 417 if b == nil { 418 return s 419 } 420 return string(b[0:nbytes]) 421 } 422 423 // Repeat returns a new string consisting of count copies of the string s. 424 func Repeat(s string, count int) string { 425 b := make([]byte, len(s)*count) 426 bp := 0 427 for i := 0; i < count; i++ { 428 bp += copy(b[bp:], s) 429 } 430 return string(b) 431 } 432 433 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case. 434 func ToUpper(s string) string { return Map(unicode.ToUpper, s) } 435 436 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case. 437 func ToLower(s string) string { return Map(unicode.ToLower, s) } 438 439 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case. 440 func ToTitle(s string) string { return Map(unicode.ToTitle, s) } 441 442 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their 443 // upper case, giving priority to the special casing rules. 444 func ToUpperSpecial(_case unicode.SpecialCase, s string) string { 445 return Map(func(r rune) rune { return _case.ToUpper(r) }, s) 446 } 447 448 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their 449 // lower case, giving priority to the special casing rules. 450 func ToLowerSpecial(_case unicode.SpecialCase, s string) string { 451 return Map(func(r rune) rune { return _case.ToLower(r) }, s) 452 } 453 454 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their 455 // title case, giving priority to the special casing rules. 456 func ToTitleSpecial(_case unicode.SpecialCase, s string) string { 457 return Map(func(r rune) rune { return _case.ToTitle(r) }, s) 458 } 459 460 // isSeparator reports whether the rune could mark a word boundary. 461 // TODO: update when package unicode captures more of the properties. 462 func isSeparator(r rune) bool { 463 // ASCII alphanumerics and underscore are not separators 464 if r <= 0x7F { 465 switch { 466 case '0' <= r && r <= '9': 467 return false 468 case 'a' <= r && r <= 'z': 469 return false 470 case 'A' <= r && r <= 'Z': 471 return false 472 case r == '_': 473 return false 474 } 475 return true 476 } 477 // Letters and digits are not separators 478 if unicode.IsLetter(r) || unicode.IsDigit(r) { 479 return false 480 } 481 // Otherwise, all we can do for now is treat spaces as separators. 482 return unicode.IsSpace(r) 483 } 484 485 // Title returns a copy of the string s with all Unicode letters that begin words 486 // mapped to their title case. 487 // 488 // BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly. 489 func Title(s string) string { 490 // Use a closure here to remember state. 491 // Hackish but effective. Depends on Map scanning in order and calling 492 // the closure once per rune. 493 prev := ' ' 494 return Map( 495 func(r rune) rune { 496 if isSeparator(prev) { 497 prev = r 498 return unicode.ToTitle(r) 499 } 500 prev = r 501 return r 502 }, 503 s) 504 } 505 506 // TrimLeftFunc returns a slice of the string s with all leading 507 // Unicode code points c satisfying f(c) removed. 508 func TrimLeftFunc(s string, f func(rune) bool) string { 509 i := indexFunc(s, f, false) 510 if i == -1 { 511 return "" 512 } 513 return s[i:] 514 } 515 516 // TrimRightFunc returns a slice of the string s with all trailing 517 // Unicode code points c satisfying f(c) removed. 518 func TrimRightFunc(s string, f func(rune) bool) string { 519 i := lastIndexFunc(s, f, false) 520 if i >= 0 && s[i] >= utf8.RuneSelf { 521 _, wid := utf8.DecodeRuneInString(s[i:]) 522 i += wid 523 } else { 524 i++ 525 } 526 return s[0:i] 527 } 528 529 // TrimFunc returns a slice of the string s with all leading 530 // and trailing Unicode code points c satisfying f(c) removed. 531 func TrimFunc(s string, f func(rune) bool) string { 532 return TrimRightFunc(TrimLeftFunc(s, f), f) 533 } 534 535 // IndexFunc returns the index into s of the first Unicode 536 // code point satisfying f(c), or -1 if none do. 537 func IndexFunc(s string, f func(rune) bool) int { 538 return indexFunc(s, f, true) 539 } 540 541 // LastIndexFunc returns the index into s of the last 542 // Unicode code point satisfying f(c), or -1 if none do. 543 func LastIndexFunc(s string, f func(rune) bool) int { 544 return lastIndexFunc(s, f, true) 545 } 546 547 // indexFunc is the same as IndexFunc except that if 548 // truth==false, the sense of the predicate function is 549 // inverted. 550 func indexFunc(s string, f func(rune) bool, truth bool) int { 551 start := 0 552 for start < len(s) { 553 wid := 1 554 r := rune(s[start]) 555 if r >= utf8.RuneSelf { 556 r, wid = utf8.DecodeRuneInString(s[start:]) 557 } 558 if f(r) == truth { 559 return start 560 } 561 start += wid 562 } 563 return -1 564 } 565 566 // lastIndexFunc is the same as LastIndexFunc except that if 567 // truth==false, the sense of the predicate function is 568 // inverted. 569 func lastIndexFunc(s string, f func(rune) bool, truth bool) int { 570 for i := len(s); i > 0; { 571 r, size := utf8.DecodeLastRuneInString(s[0:i]) 572 i -= size 573 if f(r) == truth { 574 return i 575 } 576 } 577 return -1 578 } 579 580 func makeCutsetFunc(cutset string) func(rune) bool { 581 return func(r rune) bool { return IndexRune(cutset, r) >= 0 } 582 } 583 584 // Trim returns a slice of the string s with all leading and 585 // trailing Unicode code points contained in cutset removed. 586 func Trim(s string, cutset string) string { 587 if s == "" || cutset == "" { 588 return s 589 } 590 return TrimFunc(s, makeCutsetFunc(cutset)) 591 } 592 593 // TrimLeft returns a slice of the string s with all leading 594 // Unicode code points contained in cutset removed. 595 func TrimLeft(s string, cutset string) string { 596 if s == "" || cutset == "" { 597 return s 598 } 599 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 600 } 601 602 // TrimRight returns a slice of the string s, with all trailing 603 // Unicode code points contained in cutset removed. 604 func TrimRight(s string, cutset string) string { 605 if s == "" || cutset == "" { 606 return s 607 } 608 return TrimRightFunc(s, makeCutsetFunc(cutset)) 609 } 610 611 // TrimSpace returns a slice of the string s, with all leading 612 // and trailing white space removed, as defined by Unicode. 613 func TrimSpace(s string) string { 614 return TrimFunc(s, unicode.IsSpace) 615 } 616 617 // TrimPrefix returns s without the provided leading prefix string. 618 // If s doesn't start with prefix, s is returned unchanged. 619 func TrimPrefix(s, prefix string) string { 620 if HasPrefix(s, prefix) { 621 return s[len(prefix):] 622 } 623 return s 624 } 625 626 // TrimSuffix returns s without the provided trailing suffix string. 627 // If s doesn't end with suffix, s is returned unchanged. 628 func TrimSuffix(s, suffix string) string { 629 if HasSuffix(s, suffix) { 630 return s[:len(s)-len(suffix)] 631 } 632 return s 633 } 634 635 // Replace returns a copy of the string s with the first n 636 // non-overlapping instances of old replaced by new. 637 // If n < 0, there is no limit on the number of replacements. 638 func Replace(s, old, new string, n int) string { 639 if old == new || n == 0 { 640 return s // avoid allocation 641 } 642 643 // Compute number of replacements. 644 if m := Count(s, old); m == 0 { 645 return s // avoid allocation 646 } else if n < 0 || m < n { 647 n = m 648 } 649 650 // Apply replacements to buffer. 651 t := make([]byte, len(s)+n*(len(new)-len(old))) 652 w := 0 653 start := 0 654 for i := 0; i < n; i++ { 655 j := start 656 if len(old) == 0 { 657 if i > 0 { 658 _, wid := utf8.DecodeRuneInString(s[start:]) 659 j += wid 660 } 661 } else { 662 j += Index(s[start:], old) 663 } 664 w += copy(t[w:], s[start:j]) 665 w += copy(t[w:], new) 666 start = j + len(old) 667 } 668 w += copy(t[w:], s[start:]) 669 return string(t[0:w]) 670 } 671 672 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 673 // are equal under Unicode case-folding. 674 func EqualFold(s, t string) bool { 675 for s != "" && t != "" { 676 // Extract first rune from each string. 677 var sr, tr rune 678 if s[0] < utf8.RuneSelf { 679 sr, s = rune(s[0]), s[1:] 680 } else { 681 r, size := utf8.DecodeRuneInString(s) 682 sr, s = r, s[size:] 683 } 684 if t[0] < utf8.RuneSelf { 685 tr, t = rune(t[0]), t[1:] 686 } else { 687 r, size := utf8.DecodeRuneInString(t) 688 tr, t = r, t[size:] 689 } 690 691 // If they match, keep going; if not, return false. 692 693 // Easy case. 694 if tr == sr { 695 continue 696 } 697 698 // Make sr < tr to simplify what follows. 699 if tr < sr { 700 tr, sr = sr, tr 701 } 702 // Fast check for ASCII. 703 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 704 // ASCII, and sr is upper case. tr must be lower case. 705 if tr == sr+'a'-'A' { 706 continue 707 } 708 return false 709 } 710 711 // General case. SimpleFold(x) returns the next equivalent rune > x 712 // or wraps around to smaller values. 713 r := unicode.SimpleFold(sr) 714 for r != sr && r < tr { 715 r = unicode.SimpleFold(r) 716 } 717 if r == tr { 718 continue 719 } 720 return false 721 } 722 723 // One string is empty. Are both? 724 return s == t 725 }