github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/gnovm/stdlibs/bytes/bytes.gno (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "unicode" 11 "unicode/utf8" 12 13 "internal/bytealg" 14 ) 15 16 // Equal reports whether a and b 17 // are the same length and contain the same bytes. 18 // A nil argument is equivalent to an empty slice. 19 func Equal(a, b []byte) bool { 20 // Neither cmd/compile nor gccgo allocates for these string conversions. 21 return string(a) == string(b) 22 } 23 24 // Compare returns an integer comparing two byte slices lexicographically. 25 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 26 // A nil argument is equivalent to an empty slice. 27 func Compare(a, b []byte) int { 28 return bytealg.Compare(a, b) 29 } 30 31 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 32 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 33 func explode(s []byte, n int) [][]byte { 34 if n <= 0 { 35 n = len(s) 36 } 37 a := make([][]byte, n) 38 var size int 39 na := 0 40 for len(s) > 0 { 41 if na+1 >= n { 42 a[na] = s 43 na++ 44 break 45 } 46 _, size = utf8.DecodeRune(s) 47 a[na] = s[0:size:size] 48 s = s[size:] 49 na++ 50 } 51 return a[0:na] 52 } 53 54 // Count counts the number of non-overlapping instances of sep in s. 55 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. 56 func Count(s, sep []byte) int { 57 // special case 58 if len(sep) == 0 { 59 return utf8.RuneCount(s) + 1 60 } 61 if len(sep) == 1 { 62 return bytealg.Count(s, sep[0]) 63 } 64 n := 0 65 for { 66 i := Index(s, sep) 67 if i == -1 { 68 return n 69 } 70 n++ 71 s = s[i+len(sep):] 72 } 73 } 74 75 // Contains reports whether subslice is within b. 76 func Contains(b, subslice []byte) bool { 77 return Index(b, subslice) != -1 78 } 79 80 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. 81 func ContainsAny(b []byte, chars string) bool { 82 return IndexAny(b, chars) >= 0 83 } 84 85 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. 86 func ContainsRune(b []byte, r rune) bool { 87 return IndexRune(b, r) >= 0 88 } 89 90 // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b. 91 func IndexByte(b []byte, c byte) int { 92 return bytealg.IndexByte(b, c) 93 } 94 95 func indexBytePortable(s []byte, c byte) int { 96 for i, b := range s { 97 if b == c { 98 return i 99 } 100 } 101 return -1 102 } 103 104 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 105 func LastIndex(s, sep []byte) int { 106 n := len(sep) 107 switch { 108 case n == 0: 109 return len(s) 110 case n == 1: 111 return LastIndexByte(s, sep[0]) 112 case n == len(s): 113 if Equal(s, sep) { 114 return 0 115 } 116 return -1 117 case n > len(s): 118 return -1 119 } 120 // Rabin-Karp search from the end of the string 121 hashss, pow := bytealg.HashStrRevBytes(sep) 122 last := len(s) - n 123 var h uint32 124 for i := len(s) - 1; i >= last; i-- { 125 h = h*bytealg.PrimeRK + uint32(s[i]) 126 } 127 if h == hashss && Equal(s[last:], sep) { 128 return last 129 } 130 for i := last - 1; i >= 0; i-- { 131 h *= bytealg.PrimeRK 132 h += uint32(s[i]) 133 h -= pow * uint32(s[i+n]) 134 if h == hashss && Equal(s[i:i+n], sep) { 135 return i 136 } 137 } 138 return -1 139 } 140 141 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 142 func LastIndexByte(s []byte, c byte) int { 143 for i := len(s) - 1; i >= 0; i-- { 144 if s[i] == c { 145 return i 146 } 147 } 148 return -1 149 } 150 151 // IndexRune interprets s as a sequence of UTF-8-encoded code points. 152 // It returns the byte index of the first occurrence in s of the given rune. 153 // It returns -1 if rune is not present in s. 154 // If r is utf8.RuneError, it returns the first instance of any 155 // invalid UTF-8 byte sequence. 156 func IndexRune(s []byte, r rune) int { 157 switch { 158 case 0 <= r && r < utf8.RuneSelf: 159 return IndexByte(s, byte(r)) 160 case r == utf8.RuneError: 161 for i := 0; i < len(s); { 162 r1, n := utf8.DecodeRune(s[i:]) 163 if r1 == utf8.RuneError { 164 return i 165 } 166 i += n 167 } 168 return -1 169 case !utf8.ValidRune(r): 170 return -1 171 default: 172 var b [utf8.UTFMax]byte 173 n := utf8.EncodeRune(b[:], r) 174 return Index(s, b[:n]) 175 } 176 } 177 178 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 179 // It returns the byte index of the first occurrence in s of any of the Unicode 180 // code points in chars. It returns -1 if chars is empty or if there is no code 181 // point in common. 182 func IndexAny(s []byte, chars string) int { 183 if chars == "" { 184 // Avoid scanning all of s. 185 return -1 186 } 187 if len(s) == 1 { 188 r := rune(s[0]) 189 if r >= utf8.RuneSelf { 190 // search utf8.RuneError. 191 for _, r = range chars { 192 if r == utf8.RuneError { 193 return 0 194 } 195 } 196 return -1 197 } 198 if bytealg.IndexByteString(chars, s[0]) >= 0 { 199 return 0 200 } 201 return -1 202 } 203 if len(chars) == 1 { 204 r := rune(chars[0]) 205 if r >= utf8.RuneSelf { 206 r = utf8.RuneError 207 } 208 return IndexRune(s, r) 209 } 210 if len(s) > 8 { 211 if as, isASCII := makeASCIISet(chars); isASCII { 212 for i, c := range s { 213 if as.contains(c) { 214 return i 215 } 216 } 217 return -1 218 } 219 } 220 var width int 221 for i := 0; i < len(s); i += width { 222 r := rune(s[i]) 223 if r < utf8.RuneSelf { 224 if bytealg.IndexByteString(chars, s[i]) >= 0 { 225 return i 226 } 227 width = 1 228 continue 229 } 230 r, width = utf8.DecodeRune(s[i:]) 231 if r != utf8.RuneError { 232 // r is 2 to 4 bytes 233 if len(chars) == width { 234 if chars == string(r) { 235 return i 236 } 237 continue 238 } 239 // Use bytealg.IndexString for performance if available. 240 if bytealg.MaxLen >= width { 241 if bytealg.IndexString(chars, string(r)) >= 0 { 242 return i 243 } 244 continue 245 } 246 } 247 for _, ch := range chars { 248 if r == ch { 249 return i 250 } 251 } 252 } 253 return -1 254 } 255 256 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 257 // points. It returns the byte index of the last occurrence in s of any of 258 // the Unicode code points in chars. It returns -1 if chars is empty or if 259 // there is no code point in common. 260 func LastIndexAny(s []byte, chars string) int { 261 if chars == "" { 262 // Avoid scanning all of s. 263 return -1 264 } 265 if len(s) > 8 { 266 if as, isASCII := makeASCIISet(chars); isASCII { 267 for i := len(s) - 1; i >= 0; i-- { 268 if as.contains(s[i]) { 269 return i 270 } 271 } 272 return -1 273 } 274 } 275 if len(s) == 1 { 276 r := rune(s[0]) 277 if r >= utf8.RuneSelf { 278 for _, r = range chars { 279 if r == utf8.RuneError { 280 return 0 281 } 282 } 283 return -1 284 } 285 if bytealg.IndexByteString(chars, s[0]) >= 0 { 286 return 0 287 } 288 return -1 289 } 290 if len(chars) == 1 { 291 cr := rune(chars[0]) 292 if cr >= utf8.RuneSelf { 293 cr = utf8.RuneError 294 } 295 for i := len(s); i > 0; { 296 r, size := utf8.DecodeLastRune(s[:i]) 297 i -= size 298 if r == cr { 299 return i 300 } 301 } 302 return -1 303 } 304 for i := len(s); i > 0; { 305 r := rune(s[i-1]) 306 if r < utf8.RuneSelf { 307 if bytealg.IndexByteString(chars, s[i-1]) >= 0 { 308 return i - 1 309 } 310 i-- 311 continue 312 } 313 r, size := utf8.DecodeLastRune(s[:i]) 314 i -= size 315 if r != utf8.RuneError { 316 // r is 2 to 4 bytes 317 if len(chars) == size { 318 if chars == string(r) { 319 return i 320 } 321 continue 322 } 323 // Use bytealg.IndexString for performance if available. 324 if bytealg.MaxLen >= size { 325 if bytealg.IndexString(chars, string(r)) >= 0 { 326 return i 327 } 328 continue 329 } 330 } 331 for _, ch := range chars { 332 if r == ch { 333 return i 334 } 335 } 336 } 337 return -1 338 } 339 340 // Generic split: splits after each instance of sep, 341 // including sepSave bytes of sep in the subslices. 342 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 343 if n == 0 { 344 return nil 345 } 346 if len(sep) == 0 { 347 return explode(s, n) 348 } 349 if n < 0 { 350 n = Count(s, sep) + 1 351 } 352 353 a := make([][]byte, n) 354 n-- 355 i := 0 356 for i < n { 357 m := Index(s, sep) 358 if m < 0 { 359 break 360 } 361 a[i] = s[: m+sepSave : m+sepSave] 362 s = s[m+len(sep):] 363 i++ 364 } 365 a[i] = s 366 return a[:i+1] 367 } 368 369 // SplitN slices s into subslices separated by sep and returns a slice of 370 // the subslices between those separators. 371 // If sep is empty, SplitN splits after each UTF-8 sequence. 372 // The count determines the number of subslices to return: 373 // 374 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 375 // n == 0: the result is nil (zero subslices) 376 // n < 0: all subslices 377 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 378 379 // SplitAfterN slices s into subslices after each instance of sep and 380 // returns a slice of those subslices. 381 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 382 // The count determines the number of subslices to return: 383 // 384 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 385 // n == 0: the result is nil (zero subslices) 386 // n < 0: all subslices 387 func SplitAfterN(s, sep []byte, n int) [][]byte { 388 return genSplit(s, sep, len(sep), n) 389 } 390 391 // Split slices s into all subslices separated by sep and returns a slice of 392 // the subslices between those separators. 393 // If sep is empty, Split splits after each UTF-8 sequence. 394 // It is equivalent to SplitN with a count of -1. 395 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 396 397 // SplitAfter slices s into all subslices after each instance of sep and 398 // returns a slice of those subslices. 399 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 400 // It is equivalent to SplitAfterN with a count of -1. 401 func SplitAfter(s, sep []byte) [][]byte { 402 return genSplit(s, sep, len(sep), -1) 403 } 404 405 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 406 407 // Fields interprets s as a sequence of UTF-8-encoded code points. 408 // It splits the slice s around each instance of one or more consecutive white space 409 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an 410 // empty slice if s contains only white space. 411 func Fields(s []byte) [][]byte { 412 // First count the fields. 413 // This is an exact count if s is ASCII, otherwise it is an approximation. 414 n := 0 415 wasSpace := 1 416 // setBits is used to track which bits are set in the bytes of s. 417 setBits := uint8(0) 418 for i := 0; i < len(s); i++ { 419 r := s[i] 420 setBits |= r 421 isSpace := int(asciiSpace[r]) 422 n += wasSpace & ^isSpace 423 wasSpace = isSpace 424 } 425 426 if setBits >= utf8.RuneSelf { 427 // Some runes in the input slice are not ASCII. 428 return FieldsFunc(s, unicode.IsSpace) 429 } 430 431 // ASCII fast path 432 a := make([][]byte, n) 433 na := 0 434 fieldStart := 0 435 i := 0 436 // Skip spaces in the front of the input. 437 for i < len(s) && asciiSpace[s[i]] != 0 { 438 i++ 439 } 440 fieldStart = i 441 for i < len(s) { 442 if asciiSpace[s[i]] == 0 { 443 i++ 444 continue 445 } 446 a[na] = s[fieldStart:i:i] 447 na++ 448 i++ 449 // Skip spaces in between fields. 450 for i < len(s) && asciiSpace[s[i]] != 0 { 451 i++ 452 } 453 fieldStart = i 454 } 455 if fieldStart < len(s) { // Last field might end at EOF. 456 a[na] = s[fieldStart:len(s):len(s)] 457 } 458 return a 459 } 460 461 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points. 462 // It splits the slice s at each run of code points c satisfying f(c) and 463 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 464 // len(s) == 0, an empty slice is returned. 465 // 466 // FieldsFunc makes no guarantees about the order in which it calls f(c) 467 // and assumes that f always returns the same value for a given c. 468 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 469 // A span is used to record a slice of s of the form s[start:end]. 470 // The start index is inclusive and the end index is exclusive. 471 type span struct { 472 start int 473 end int 474 } 475 spans := make([]span, 0, 32) 476 477 // Find the field start and end indices. 478 // Doing this in a separate pass (rather than slicing the string s 479 // and collecting the result substrings right away) is significantly 480 // more efficient, possibly due to cache effects. 481 start := -1 // valid span start if >= 0 482 for i := 0; i < len(s); { 483 size := 1 484 r := rune(s[i]) 485 if r >= utf8.RuneSelf { 486 r, size = utf8.DecodeRune(s[i:]) 487 } 488 if f(r) { 489 if start >= 0 { 490 spans = append(spans, span{start, i}) 491 start = -1 492 } 493 } else { 494 if start < 0 { 495 start = i 496 } 497 } 498 i += size 499 } 500 501 // Last field might end at EOF. 502 if start >= 0 { 503 spans = append(spans, span{start, len(s)}) 504 } 505 506 // Create subslices from recorded field indices. 507 a := make([][]byte, len(spans)) 508 for i, span := range spans { 509 a[i] = s[span.start:span.end:span.end] 510 } 511 512 return a 513 } 514 515 // Join concatenates the elements of s to create a new byte slice. The separator 516 // sep is placed between elements in the resulting slice. 517 func Join(s [][]byte, sep []byte) []byte { 518 if len(s) == 0 { 519 return []byte{} 520 } 521 if len(s) == 1 { 522 // Just return a copy. 523 return append([]byte(nil), s[0]...) 524 } 525 n := len(sep) * (len(s) - 1) 526 for _, v := range s { 527 n += len(v) 528 } 529 530 b := make([]byte, n) 531 bp := copy(b, s[0]) 532 for _, v := range s[1:] { 533 bp += copy(b[bp:], sep) 534 bp += copy(b[bp:], v) 535 } 536 return b 537 } 538 539 // HasPrefix tests whether the byte slice s begins with prefix. 540 func HasPrefix(s, prefix []byte) bool { 541 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 542 } 543 544 // HasSuffix tests whether the byte slice s ends with suffix. 545 func HasSuffix(s, suffix []byte) bool { 546 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 547 } 548 549 // Map returns a copy of the byte slice s with all its characters modified 550 // according to the mapping function. If mapping returns a negative value, the character is 551 // dropped from the byte slice with no replacement. The characters in s and the 552 // output are interpreted as UTF-8-encoded code points. 553 func Map(mapping func(r rune) rune, s []byte) []byte { 554 // In the worst case, the slice can grow when mapped, making 555 // things unpleasant. But it's so rare we barge in assuming it's 556 // fine. It could also shrink but that falls out naturally. 557 maxbytes := len(s) // length of b 558 nbytes := 0 // number of bytes encoded in b 559 b := make([]byte, maxbytes) 560 for i := 0; i < len(s); { 561 wid := 1 562 r := rune(s[i]) 563 if r >= utf8.RuneSelf { 564 r, wid = utf8.DecodeRune(s[i:]) 565 } 566 r = mapping(r) 567 if r >= 0 { 568 rl := utf8.RuneLen(r) 569 if rl < 0 { 570 rl = len(string(utf8.RuneError)) 571 } 572 if nbytes+rl > maxbytes { 573 // Grow the buffer. 574 maxbytes = maxbytes*2 + utf8.UTFMax 575 nb := make([]byte, maxbytes) 576 copy(nb, b[0:nbytes]) 577 b = nb 578 } 579 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 580 } 581 i += wid 582 } 583 return b[0:nbytes] 584 } 585 586 // Repeat returns a new byte slice consisting of count copies of b. 587 // 588 // It panics if count is negative or if 589 // the result of (len(b) * count) overflows. 590 func Repeat(b []byte, count int) []byte { 591 if count == 0 { 592 return []byte{} 593 } 594 // Since we cannot return an error on overflow, 595 // we should panic if the repeat will generate 596 // an overflow. 597 // See Issue golang.org/issue/16237. 598 if count < 0 { 599 panic("bytes: negative Repeat count") 600 } else if len(b)*count/count != len(b) { 601 panic("bytes: Repeat count causes overflow") 602 } 603 604 nb := make([]byte, len(b)*count) 605 bp := copy(nb, b) 606 for bp < len(nb) { 607 copy(nb[bp:], nb[:bp]) 608 bp *= 2 609 } 610 return nb 611 } 612 613 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to 614 // their upper case. 615 func ToUpper(s []byte) []byte { 616 isASCII, hasLower := true, false 617 for i := 0; i < len(s); i++ { 618 c := s[i] 619 if c >= utf8.RuneSelf { 620 isASCII = false 621 break 622 } 623 hasLower = hasLower || ('a' <= c && c <= 'z') 624 } 625 626 if isASCII { // optimize for ASCII-only byte slices. 627 if !hasLower { 628 // Just return a copy. 629 return append([]byte(""), s...) 630 } 631 b := make([]byte, len(s)) 632 for i := 0; i < len(s); i++ { 633 c := s[i] 634 if 'a' <= c && c <= 'z' { 635 c -= 'a' - 'A' 636 } 637 b[i] = c 638 } 639 return b 640 } 641 return Map(unicode.ToUpper, s) 642 } 643 644 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to 645 // their lower case. 646 func ToLower(s []byte) []byte { 647 isASCII, hasUpper := true, false 648 for i := 0; i < len(s); i++ { 649 c := s[i] 650 if c >= utf8.RuneSelf { 651 isASCII = false 652 break 653 } 654 hasUpper = hasUpper || ('A' <= c && c <= 'Z') 655 } 656 657 if isASCII { // optimize for ASCII-only byte slices. 658 if !hasUpper { 659 return append([]byte(""), s...) 660 } 661 b := make([]byte, len(s)) 662 for i := 0; i < len(s); i++ { 663 c := s[i] 664 if 'A' <= c && c <= 'Z' { 665 c += 'a' - 'A' 666 } 667 b[i] = c 668 } 669 return b 670 } 671 return Map(unicode.ToLower, s) 672 } 673 674 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. 675 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 676 677 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 678 // upper case, giving priority to the special casing rules. 679 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { 680 return Map(c.ToUpper, s) 681 } 682 683 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 684 // lower case, giving priority to the special casing rules. 685 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { 686 return Map(c.ToLower, s) 687 } 688 689 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 690 // title case, giving priority to the special casing rules. 691 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { 692 return Map(c.ToTitle, s) 693 } 694 695 // ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes 696 // representing invalid UTF-8 replaced with the bytes in replacement, which may be empty. 697 func ToValidUTF8(s, replacement []byte) []byte { 698 b := make([]byte, 0, len(s)+len(replacement)) 699 invalid := false // previous byte was from an invalid UTF-8 sequence 700 for i := 0; i < len(s); { 701 c := s[i] 702 if c < utf8.RuneSelf { 703 i++ 704 invalid = false 705 b = append(b, byte(c)) 706 continue 707 } 708 _, wid := utf8.DecodeRune(s[i:]) 709 if wid == 1 { 710 i++ 711 if !invalid { 712 invalid = true 713 b = append(b, replacement...) 714 } 715 continue 716 } 717 invalid = false 718 b = append(b, s[i:i+wid]...) 719 i += wid 720 } 721 return b 722 } 723 724 // isSeparator reports whether the rune could mark a word boundary. 725 // TODO: update when package unicode captures more of the properties. 726 func isSeparator(r rune) bool { 727 // ASCII alphanumerics and underscore are not separators 728 if r <= 0x7F { 729 switch { 730 case '0' <= r && r <= '9': 731 return false 732 case 'a' <= r && r <= 'z': 733 return false 734 case 'A' <= r && r <= 'Z': 735 return false 736 case r == '_': 737 return false 738 } 739 return true 740 } 741 // Letters and digits are not separators 742 if unicode.IsLetter(r) || unicode.IsDigit(r) { 743 return false 744 } 745 // Otherwise, all we can do for now is treat spaces as separators. 746 return unicode.IsSpace(r) 747 } 748 749 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin 750 // words mapped to their title case. 751 // 752 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 753 func Title(s []byte) []byte { 754 // Use a closure here to remember state. 755 // Hackish but effective. Depends on Map scanning in order and calling 756 // the closure once per rune. 757 prev := ' ' 758 return Map( 759 func(r rune) rune { 760 if isSeparator(prev) { 761 prev = r 762 return unicode.ToTitle(r) 763 } 764 prev = r 765 return r 766 }, 767 s) 768 } 769 770 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off 771 // all leading UTF-8-encoded code points c that satisfy f(c). 772 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 773 i := indexFunc(s, f, false) 774 if i == -1 { 775 return nil 776 } 777 return s[i:] 778 } 779 780 // TrimRightFunc returns a subslice of s by slicing off all trailing 781 // UTF-8-encoded code points c that satisfy f(c). 782 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 783 i := lastIndexFunc(s, f, false) 784 if i >= 0 && s[i] >= utf8.RuneSelf { 785 _, wid := utf8.DecodeRune(s[i:]) 786 i += wid 787 } else { 788 i++ 789 } 790 return s[0:i] 791 } 792 793 // TrimFunc returns a subslice of s by slicing off all leading and trailing 794 // UTF-8-encoded code points c that satisfy f(c). 795 func TrimFunc(s []byte, f func(r rune) bool) []byte { 796 return TrimRightFunc(TrimLeftFunc(s, f), f) 797 } 798 799 // TrimPrefix returns s without the provided leading prefix string. 800 // If s doesn't start with prefix, s is returned unchanged. 801 func TrimPrefix(s, prefix []byte) []byte { 802 if HasPrefix(s, prefix) { 803 return s[len(prefix):] 804 } 805 return s 806 } 807 808 // TrimSuffix returns s without the provided trailing suffix string. 809 // If s doesn't end with suffix, s is returned unchanged. 810 func TrimSuffix(s, suffix []byte) []byte { 811 if HasSuffix(s, suffix) { 812 return s[:len(s)-len(suffix)] 813 } 814 return s 815 } 816 817 // IndexFunc interprets s as a sequence of UTF-8-encoded code points. 818 // It returns the byte index in s of the first Unicode 819 // code point satisfying f(c), or -1 if none do. 820 func IndexFunc(s []byte, f func(r rune) bool) int { 821 return indexFunc(s, f, true) 822 } 823 824 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. 825 // It returns the byte index in s of the last Unicode 826 // code point satisfying f(c), or -1 if none do. 827 func LastIndexFunc(s []byte, f func(r rune) bool) int { 828 return lastIndexFunc(s, f, true) 829 } 830 831 // indexFunc is the same as IndexFunc except that if 832 // truth==false, the sense of the predicate function is 833 // inverted. 834 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 835 start := 0 836 for start < len(s) { 837 wid := 1 838 r := rune(s[start]) 839 if r >= utf8.RuneSelf { 840 r, wid = utf8.DecodeRune(s[start:]) 841 } 842 if f(r) == truth { 843 return start 844 } 845 start += wid 846 } 847 return -1 848 } 849 850 // lastIndexFunc is the same as LastIndexFunc except that if 851 // truth==false, the sense of the predicate function is 852 // inverted. 853 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 854 for i := len(s); i > 0; { 855 r, size := rune(s[i-1]), 1 856 if r >= utf8.RuneSelf { 857 r, size = utf8.DecodeLastRune(s[0:i]) 858 } 859 i -= size 860 if f(r) == truth { 861 return i 862 } 863 } 864 return -1 865 } 866 867 // asciiSet is a 32-byte value, where each bit represents the presence of a 868 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 869 // starting with the least-significant bit of the lowest word to the 870 // most-significant bit of the highest word, map to the full range of all 871 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 872 // ensuring that any non-ASCII character will be reported as not in the set. 873 type asciiSet [8]uint32 874 875 // makeASCIISet creates a set of ASCII characters and reports whether all 876 // characters in chars are ASCII. 877 func makeASCIISet(chars string) (as asciiSet, ok bool) { 878 for i := 0; i < len(chars); i++ { 879 c := chars[i] 880 if c >= utf8.RuneSelf { 881 return as, false 882 } 883 as[c>>5] |= 1 << uint(c&31) 884 } 885 return as, true 886 } 887 888 // contains reports whether c is inside the set. 889 func (as *asciiSet) contains(c byte) bool { 890 return (as[c>>5] & (1 << uint(c&31))) != 0 891 } 892 893 func makeCutsetFunc(cutset string) func(r rune) bool { 894 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 895 return func(r rune) bool { 896 return r == rune(cutset[0]) 897 } 898 } 899 if as, isASCII := makeASCIISet(cutset); isASCII { 900 return func(r rune) bool { 901 return r < utf8.RuneSelf && as.contains(byte(r)) 902 } 903 } 904 return func(r rune) bool { 905 for _, c := range cutset { 906 if c == r { 907 return true 908 } 909 } 910 return false 911 } 912 } 913 914 // Trim returns a subslice of s by slicing off all leading and 915 // trailing UTF-8-encoded code points contained in cutset. 916 func Trim(s []byte, cutset string) []byte { 917 return TrimFunc(s, makeCutsetFunc(cutset)) 918 } 919 920 // TrimLeft returns a subslice of s by slicing off all leading 921 // UTF-8-encoded code points contained in cutset. 922 func TrimLeft(s []byte, cutset string) []byte { 923 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 924 } 925 926 // TrimRight returns a subslice of s by slicing off all trailing 927 // UTF-8-encoded code points that are contained in cutset. 928 func TrimRight(s []byte, cutset string) []byte { 929 return TrimRightFunc(s, makeCutsetFunc(cutset)) 930 } 931 932 // TrimSpace returns a subslice of s by slicing off all leading and 933 // trailing white space, as defined by Unicode. 934 func TrimSpace(s []byte) []byte { 935 // Fast path for ASCII: look for the first ASCII non-space byte 936 start := 0 937 for ; start < len(s); start++ { 938 c := s[start] 939 if c >= utf8.RuneSelf { 940 // If we run into a non-ASCII byte, fall back to the 941 // slower unicode-aware method on the remaining bytes 942 return TrimFunc(s[start:], unicode.IsSpace) 943 } 944 if asciiSpace[c] == 0 { 945 break 946 } 947 } 948 949 // Now look for the first ASCII non-space byte from the end 950 stop := len(s) 951 for ; stop > start; stop-- { 952 c := s[stop-1] 953 if c >= utf8.RuneSelf { 954 return TrimFunc(s[start:stop], unicode.IsSpace) 955 } 956 if asciiSpace[c] == 0 { 957 break 958 } 959 } 960 961 // At this point s[start:stop] starts and ends with an ASCII 962 // non-space bytes, so we're done. Non-ASCII cases have already 963 // been handled above. 964 if start == stop { 965 // Special case to preserve previous TrimLeftFunc behavior, 966 // returning nil instead of empty slice if all spaces. 967 return nil 968 } 969 return s[start:stop] 970 } 971 972 // Runes interprets s as a sequence of UTF-8-encoded code points. 973 // It returns a slice of runes (Unicode code points) equivalent to s. 974 func Runes(s []byte) []rune { 975 t := make([]rune, utf8.RuneCount(s)) 976 i := 0 977 for len(s) > 0 { 978 r, l := utf8.DecodeRune(s) 979 t[i] = r 980 i++ 981 s = s[l:] 982 } 983 return t 984 } 985 986 // Replace returns a copy of the slice s with the first n 987 // non-overlapping instances of old replaced by new. 988 // If old is empty, it matches at the beginning of the slice 989 // and after each UTF-8 sequence, yielding up to k+1 replacements 990 // for a k-rune slice. 991 // If n < 0, there is no limit on the number of replacements. 992 func Replace(s, old, new_ []byte, n int) []byte { 993 m := 0 994 if n != 0 { 995 // Compute number of replacements. 996 m = Count(s, old) 997 } 998 if m == 0 { 999 // Just return a copy. 1000 return append([]byte(nil), s...) 1001 } 1002 if n < 0 || m < n { 1003 n = m 1004 } 1005 1006 // Apply replacements to buffer. 1007 t := make([]byte, len(s)+n*(len(new_)-len(old))) 1008 w := 0 1009 start := 0 1010 for i := 0; i < n; i++ { 1011 j := start 1012 if len(old) == 0 { 1013 if i > 0 { 1014 _, wid := utf8.DecodeRune(s[start:]) 1015 j += wid 1016 } 1017 } else { 1018 j += Index(s[start:], old) 1019 } 1020 w += copy(t[w:], s[start:j]) 1021 w += copy(t[w:], new_) 1022 start = j + len(old) 1023 } 1024 w += copy(t[w:], s[start:]) 1025 return t[0:w] 1026 } 1027 1028 // ReplaceAll returns a copy of the slice s with all 1029 // non-overlapping instances of old replaced by new. 1030 // If old is empty, it matches at the beginning of the slice 1031 // and after each UTF-8 sequence, yielding up to k+1 replacements 1032 // for a k-rune slice. 1033 func ReplaceAll(s, old, new_ []byte) []byte { 1034 return Replace(s, old, new_, -1) 1035 } 1036 1037 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 1038 // are equal under Unicode case-folding, which is a more general 1039 // form of case-insensitivity. 1040 func EqualFold(s, t []byte) bool { 1041 for len(s) != 0 && len(t) != 0 { 1042 // Extract first rune from each. 1043 var sr, tr rune 1044 if s[0] < utf8.RuneSelf { 1045 sr, s = rune(s[0]), s[1:] 1046 } else { 1047 r, size := utf8.DecodeRune(s) 1048 sr, s = r, s[size:] 1049 } 1050 if t[0] < utf8.RuneSelf { 1051 tr, t = rune(t[0]), t[1:] 1052 } else { 1053 r, size := utf8.DecodeRune(t) 1054 tr, t = r, t[size:] 1055 } 1056 1057 // If they match, keep going; if not, return false. 1058 1059 // Easy case. 1060 if tr == sr { 1061 continue 1062 } 1063 1064 // Make sr < tr to simplify what follows. 1065 if tr < sr { 1066 tr, sr = sr, tr 1067 } 1068 // Fast check for ASCII. 1069 if tr < utf8.RuneSelf { 1070 // ASCII only, sr/tr must be upper/lower case 1071 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 1072 continue 1073 } 1074 return false 1075 } 1076 1077 // General case. SimpleFold(x) returns the next equivalent rune > x 1078 // or wraps around to smaller values. 1079 r := unicode.SimpleFold(sr) 1080 for r != sr && r < tr { 1081 r = unicode.SimpleFold(r) 1082 } 1083 if r == tr { 1084 continue 1085 } 1086 return false 1087 } 1088 1089 // One string is empty. Are both? 1090 return len(s) == len(t) 1091 } 1092 1093 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 1094 func Index(s, sep []byte) int { 1095 n := len(sep) 1096 switch { 1097 case n == 0: 1098 return 0 1099 case n == 1: 1100 return IndexByte(s, sep[0]) 1101 case n == len(s): 1102 if Equal(sep, s) { 1103 return 0 1104 } 1105 return -1 1106 case n > len(s): 1107 return -1 1108 case n <= bytealg.MaxLen: 1109 // Use brute force when s and sep both are small 1110 if len(s) <= bytealg.MaxBruteForce { 1111 return bytealg.Index(s, sep) 1112 } 1113 c0 := sep[0] 1114 c1 := sep[1] 1115 i := 0 1116 t := len(s) - n + 1 1117 fails := 0 1118 for i < t { 1119 if s[i] != c0 { 1120 // IndexByte is faster than bytealg.Index, so use it as long as 1121 // we're not getting lots of false positives. 1122 o := IndexByte(s[i+1:t], c0) 1123 if o < 0 { 1124 return -1 1125 } 1126 i += o + 1 1127 } 1128 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 1129 return i 1130 } 1131 fails++ 1132 i++ 1133 // Switch to bytealg.Index when IndexByte produces too many false positives. 1134 if fails > bytealg.Cutover(i) { 1135 r := bytealg.Index(s[i:], sep) 1136 if r >= 0 { 1137 return r + i 1138 } 1139 return -1 1140 } 1141 } 1142 return -1 1143 } 1144 c0 := sep[0] 1145 c1 := sep[1] 1146 i := 0 1147 fails := 0 1148 t := len(s) - n + 1 1149 for i < t { 1150 if s[i] != c0 { 1151 o := IndexByte(s[i+1:t], c0) 1152 if o < 0 { 1153 break 1154 } 1155 i += o + 1 1156 } 1157 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 1158 return i 1159 } 1160 i++ 1161 fails++ 1162 if fails >= 4+i>>4 && i < t { 1163 // Give up on IndexByte, it isn't skipping ahead 1164 // far enough to be better than Rabin-Karp. 1165 // Experiments (using IndexPeriodic) suggest 1166 // the cutover is about 16 byte skips. 1167 // TODO: if large prefixes of sep are matching 1168 // we should cutover at even larger average skips, 1169 // because Equal becomes that much more expensive. 1170 // This code does not take that effect into account. 1171 j := bytealg.IndexRabinKarpBytes(s[i:], sep) 1172 if j < 0 { 1173 return -1 1174 } 1175 return i + j 1176 } 1177 } 1178 return -1 1179 }