github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/bytes/bytes.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "internal/bytealg" 11 "unicode" 12 "unicode/utf8" 13 ) 14 15 // Equal returns a boolean reporting whether a and b 16 // are the same length and contain the same bytes. 17 // A nil argument is equivalent to an empty slice. 18 func Equal(a, b []byte) bool { 19 return bytealg.Equal(a, b) 20 } 21 22 func equalPortable(a, b []byte) bool { 23 if len(a) != len(b) { 24 return false 25 } 26 for i, c := range a { 27 if c != b[i] { 28 return false 29 } 30 } 31 return true 32 } 33 34 // Compare returns an integer comparing two byte slices lexicographically. 35 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. 36 // A nil argument is equivalent to an empty slice. 37 func Compare(a, b []byte) int { 38 return bytealg.Compare(a, b) 39 } 40 41 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 42 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 43 func explode(s []byte, n int) [][]byte { 44 if n <= 0 { 45 n = len(s) 46 } 47 a := make([][]byte, n) 48 var size int 49 na := 0 50 for len(s) > 0 { 51 if na+1 >= n { 52 a[na] = s 53 na++ 54 break 55 } 56 _, size = utf8.DecodeRune(s) 57 a[na] = s[0:size:size] 58 s = s[size:] 59 na++ 60 } 61 return a[0:na] 62 } 63 64 // Count counts the number of non-overlapping instances of sep in s. 65 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. 66 func Count(s, sep []byte) int { 67 // special case 68 if len(sep) == 0 { 69 return utf8.RuneCount(s) + 1 70 } 71 if len(sep) == 1 { 72 return bytealg.Count(s, sep[0]) 73 } 74 n := 0 75 for { 76 i := Index(s, sep) 77 if i == -1 { 78 return n 79 } 80 n++ 81 s = s[i+len(sep):] 82 } 83 } 84 85 // Contains reports whether subslice is within b. 86 func Contains(b, subslice []byte) bool { 87 return Index(b, subslice) != -1 88 } 89 90 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. 91 func ContainsAny(b []byte, chars string) bool { 92 return IndexAny(b, chars) >= 0 93 } 94 95 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. 96 func ContainsRune(b []byte, r rune) bool { 97 return IndexRune(b, r) >= 0 98 } 99 100 // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b. 101 func IndexByte(b []byte, c byte) int { 102 return bytealg.IndexByte(b, c) 103 } 104 105 func indexBytePortable(s []byte, c byte) int { 106 for i, b := range s { 107 if b == c { 108 return i 109 } 110 } 111 return -1 112 } 113 114 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 115 func LastIndex(s, sep []byte) int { 116 n := len(sep) 117 if n == 0 { 118 return len(s) 119 } 120 c := sep[0] 121 for i := len(s) - n; i >= 0; i-- { 122 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { 123 return i 124 } 125 } 126 return -1 127 } 128 129 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 130 func LastIndexByte(s []byte, c byte) int { 131 for i := len(s) - 1; i >= 0; i-- { 132 if s[i] == c { 133 return i 134 } 135 } 136 return -1 137 } 138 139 // IndexRune interprets s as a sequence of UTF-8-encoded code points. 140 // It returns the byte index of the first occurrence in s of the given rune. 141 // It returns -1 if rune is not present in s. 142 // If r is utf8.RuneError, it returns the first instance of any 143 // invalid UTF-8 byte sequence. 144 func IndexRune(s []byte, r rune) int { 145 switch { 146 case 0 <= r && r < utf8.RuneSelf: 147 return IndexByte(s, byte(r)) 148 case r == utf8.RuneError: 149 for i := 0; i < len(s); { 150 r1, n := utf8.DecodeRune(s[i:]) 151 if r1 == utf8.RuneError { 152 return i 153 } 154 i += n 155 } 156 return -1 157 case !utf8.ValidRune(r): 158 return -1 159 default: 160 var b [utf8.UTFMax]byte 161 n := utf8.EncodeRune(b[:], r) 162 return Index(s, b[:n]) 163 } 164 } 165 166 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 167 // It returns the byte index of the first occurrence in s of any of the Unicode 168 // code points in chars. It returns -1 if chars is empty or if there is no code 169 // point in common. 170 func IndexAny(s []byte, chars string) int { 171 if chars == "" { 172 // Avoid scanning all of s. 173 return -1 174 } 175 if len(s) > 8 { 176 if as, isASCII := makeASCIISet(chars); isASCII { 177 for i, c := range s { 178 if as.contains(c) { 179 return i 180 } 181 } 182 return -1 183 } 184 } 185 var width int 186 for i := 0; i < len(s); i += width { 187 r := rune(s[i]) 188 if r < utf8.RuneSelf { 189 width = 1 190 } else { 191 r, width = utf8.DecodeRune(s[i:]) 192 } 193 for _, ch := range chars { 194 if r == ch { 195 return i 196 } 197 } 198 } 199 return -1 200 } 201 202 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 203 // points. It returns the byte index of the last occurrence in s of any of 204 // the Unicode code points in chars. It returns -1 if chars is empty or if 205 // there is no code point in common. 206 func LastIndexAny(s []byte, chars string) int { 207 if chars == "" { 208 // Avoid scanning all of s. 209 return -1 210 } 211 if len(s) > 8 { 212 if as, isASCII := makeASCIISet(chars); isASCII { 213 for i := len(s) - 1; i >= 0; i-- { 214 if as.contains(s[i]) { 215 return i 216 } 217 } 218 return -1 219 } 220 } 221 for i := len(s); i > 0; { 222 r, size := utf8.DecodeLastRune(s[:i]) 223 i -= size 224 for _, c := range chars { 225 if r == c { 226 return i 227 } 228 } 229 } 230 return -1 231 } 232 233 // Generic split: splits after each instance of sep, 234 // including sepSave bytes of sep in the subslices. 235 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 236 if n == 0 { 237 return nil 238 } 239 if len(sep) == 0 { 240 return explode(s, n) 241 } 242 if n < 0 { 243 n = Count(s, sep) + 1 244 } 245 246 a := make([][]byte, n) 247 n-- 248 i := 0 249 for i < n { 250 m := Index(s, sep) 251 if m < 0 { 252 break 253 } 254 a[i] = s[: m+sepSave : m+sepSave] 255 s = s[m+len(sep):] 256 i++ 257 } 258 a[i] = s 259 return a[:i+1] 260 } 261 262 // SplitN slices s into subslices separated by sep and returns a slice of 263 // the subslices between those separators. 264 // If sep is empty, SplitN splits after each UTF-8 sequence. 265 // The count determines the number of subslices to return: 266 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 267 // n == 0: the result is nil (zero subslices) 268 // n < 0: all subslices 269 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 270 271 // SplitAfterN slices s into subslices after each instance of sep and 272 // returns a slice of those subslices. 273 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 274 // The count determines the number of subslices to return: 275 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 276 // n == 0: the result is nil (zero subslices) 277 // n < 0: all subslices 278 func SplitAfterN(s, sep []byte, n int) [][]byte { 279 return genSplit(s, sep, len(sep), n) 280 } 281 282 // Split slices s into all subslices separated by sep and returns a slice of 283 // the subslices between those separators. 284 // If sep is empty, Split splits after each UTF-8 sequence. 285 // It is equivalent to SplitN with a count of -1. 286 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 287 288 // SplitAfter slices s into all subslices after each instance of sep and 289 // returns a slice of those subslices. 290 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 291 // It is equivalent to SplitAfterN with a count of -1. 292 func SplitAfter(s, sep []byte) [][]byte { 293 return genSplit(s, sep, len(sep), -1) 294 } 295 296 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 297 298 // Fields interprets s as a sequence of UTF-8-encoded code points. 299 // It splits the slice s around each instance of one or more consecutive white space 300 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an 301 // empty slice if s contains only white space. 302 func Fields(s []byte) [][]byte { 303 // First count the fields. 304 // This is an exact count if s is ASCII, otherwise it is an approximation. 305 n := 0 306 wasSpace := 1 307 // setBits is used to track which bits are set in the bytes of s. 308 setBits := uint8(0) 309 for i := 0; i < len(s); i++ { 310 r := s[i] 311 setBits |= r 312 isSpace := int(asciiSpace[r]) 313 n += wasSpace & ^isSpace 314 wasSpace = isSpace 315 } 316 317 if setBits >= utf8.RuneSelf { 318 // Some runes in the input slice are not ASCII. 319 return FieldsFunc(s, unicode.IsSpace) 320 } 321 322 // ASCII fast path 323 a := make([][]byte, n) 324 na := 0 325 fieldStart := 0 326 i := 0 327 // Skip spaces in the front of the input. 328 for i < len(s) && asciiSpace[s[i]] != 0 { 329 i++ 330 } 331 fieldStart = i 332 for i < len(s) { 333 if asciiSpace[s[i]] == 0 { 334 i++ 335 continue 336 } 337 a[na] = s[fieldStart:i:i] 338 na++ 339 i++ 340 // Skip spaces in between fields. 341 for i < len(s) && asciiSpace[s[i]] != 0 { 342 i++ 343 } 344 fieldStart = i 345 } 346 if fieldStart < len(s) { // Last field might end at EOF. 347 a[na] = s[fieldStart:len(s):len(s)] 348 } 349 return a 350 } 351 352 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points. 353 // It splits the slice s at each run of code points c satisfying f(c) and 354 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 355 // len(s) == 0, an empty slice is returned. 356 // FieldsFunc makes no guarantees about the order in which it calls f(c). 357 // If f does not return consistent results for a given c, FieldsFunc may crash. 358 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 359 // A span is used to record a slice of s of the form s[start:end]. 360 // The start index is inclusive and the end index is exclusive. 361 type span struct { 362 start int 363 end int 364 } 365 spans := make([]span, 0, 32) 366 367 // Find the field start and end indices. 368 wasField := false 369 fromIndex := 0 370 for i := 0; i < len(s); { 371 size := 1 372 r := rune(s[i]) 373 if r >= utf8.RuneSelf { 374 r, size = utf8.DecodeRune(s[i:]) 375 } 376 if f(r) { 377 if wasField { 378 spans = append(spans, span{start: fromIndex, end: i}) 379 wasField = false 380 } 381 } else { 382 if !wasField { 383 fromIndex = i 384 wasField = true 385 } 386 } 387 i += size 388 } 389 390 // Last field might end at EOF. 391 if wasField { 392 spans = append(spans, span{fromIndex, len(s)}) 393 } 394 395 // Create subslices from recorded field indices. 396 a := make([][]byte, len(spans)) 397 for i, span := range spans { 398 a[i] = s[span.start:span.end:span.end] 399 } 400 401 return a 402 } 403 404 // Join concatenates the elements of s to create a new byte slice. The separator 405 // sep is placed between elements in the resulting slice. 406 func Join(s [][]byte, sep []byte) []byte { 407 if len(s) == 0 { 408 return []byte{} 409 } 410 if len(s) == 1 { 411 // Just return a copy. 412 return append([]byte(nil), s[0]...) 413 } 414 n := len(sep) * (len(s) - 1) 415 for _, v := range s { 416 n += len(v) 417 } 418 419 b := make([]byte, n) 420 bp := copy(b, s[0]) 421 for _, v := range s[1:] { 422 bp += copy(b[bp:], sep) 423 bp += copy(b[bp:], v) 424 } 425 return b 426 } 427 428 // HasPrefix tests whether the byte slice s begins with prefix. 429 func HasPrefix(s, prefix []byte) bool { 430 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 431 } 432 433 // HasSuffix tests whether the byte slice s ends with suffix. 434 func HasSuffix(s, suffix []byte) bool { 435 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 436 } 437 438 // Map returns a copy of the byte slice s with all its characters modified 439 // according to the mapping function. If mapping returns a negative value, the character is 440 // dropped from the byte slice with no replacement. The characters in s and the 441 // output are interpreted as UTF-8-encoded code points. 442 func Map(mapping func(r rune) rune, s []byte) []byte { 443 // In the worst case, the slice can grow when mapped, making 444 // things unpleasant. But it's so rare we barge in assuming it's 445 // fine. It could also shrink but that falls out naturally. 446 maxbytes := len(s) // length of b 447 nbytes := 0 // number of bytes encoded in b 448 b := make([]byte, maxbytes) 449 for i := 0; i < len(s); { 450 wid := 1 451 r := rune(s[i]) 452 if r >= utf8.RuneSelf { 453 r, wid = utf8.DecodeRune(s[i:]) 454 } 455 r = mapping(r) 456 if r >= 0 { 457 rl := utf8.RuneLen(r) 458 if rl < 0 { 459 rl = len(string(utf8.RuneError)) 460 } 461 if nbytes+rl > maxbytes { 462 // Grow the buffer. 463 maxbytes = maxbytes*2 + utf8.UTFMax 464 nb := make([]byte, maxbytes) 465 copy(nb, b[0:nbytes]) 466 b = nb 467 } 468 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 469 } 470 i += wid 471 } 472 return b[0:nbytes] 473 } 474 475 // Repeat returns a new byte slice consisting of count copies of b. 476 // 477 // It panics if count is negative or if 478 // the result of (len(b) * count) overflows. 479 func Repeat(b []byte, count int) []byte { 480 // Since we cannot return an error on overflow, 481 // we should panic if the repeat will generate 482 // an overflow. 483 // See Issue golang.org/issue/16237. 484 if count < 0 { 485 panic("bytes: negative Repeat count") 486 } else if count > 0 && len(b)*count/count != len(b) { 487 panic("bytes: Repeat count causes overflow") 488 } 489 490 nb := make([]byte, len(b)*count) 491 bp := copy(nb, b) 492 for bp < len(nb) { 493 copy(nb[bp:], nb[:bp]) 494 bp *= 2 495 } 496 return nb 497 } 498 499 // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. 500 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } 501 502 // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. 503 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } 504 505 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. 506 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 507 508 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 509 // upper case, giving priority to the special casing rules. 510 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { 511 return Map(c.ToUpper, s) 512 } 513 514 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 515 // lower case, giving priority to the special casing rules. 516 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { 517 return Map(c.ToLower, s) 518 } 519 520 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 521 // title case, giving priority to the special casing rules. 522 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { 523 return Map(c.ToTitle, s) 524 } 525 526 // isSeparator reports whether the rune could mark a word boundary. 527 // TODO: update when package unicode captures more of the properties. 528 func isSeparator(r rune) bool { 529 // ASCII alphanumerics and underscore are not separators 530 if r <= 0x7F { 531 switch { 532 case '0' <= r && r <= '9': 533 return false 534 case 'a' <= r && r <= 'z': 535 return false 536 case 'A' <= r && r <= 'Z': 537 return false 538 case r == '_': 539 return false 540 } 541 return true 542 } 543 // Letters and digits are not separators 544 if unicode.IsLetter(r) || unicode.IsDigit(r) { 545 return false 546 } 547 // Otherwise, all we can do for now is treat spaces as separators. 548 return unicode.IsSpace(r) 549 } 550 551 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin 552 // words mapped to their title case. 553 // 554 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 555 func Title(s []byte) []byte { 556 // Use a closure here to remember state. 557 // Hackish but effective. Depends on Map scanning in order and calling 558 // the closure once per rune. 559 prev := ' ' 560 return Map( 561 func(r rune) rune { 562 if isSeparator(prev) { 563 prev = r 564 return unicode.ToTitle(r) 565 } 566 prev = r 567 return r 568 }, 569 s) 570 } 571 572 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off 573 // all leading UTF-8-encoded code points c that satisfy f(c). 574 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 575 i := indexFunc(s, f, false) 576 if i == -1 { 577 return nil 578 } 579 return s[i:] 580 } 581 582 // TrimRightFunc returns a subslice of s by slicing off all trailing 583 // UTF-8-encoded code points c that satisfy f(c). 584 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 585 i := lastIndexFunc(s, f, false) 586 if i >= 0 && s[i] >= utf8.RuneSelf { 587 _, wid := utf8.DecodeRune(s[i:]) 588 i += wid 589 } else { 590 i++ 591 } 592 return s[0:i] 593 } 594 595 // TrimFunc returns a subslice of s by slicing off all leading and trailing 596 // UTF-8-encoded code points c that satisfy f(c). 597 func TrimFunc(s []byte, f func(r rune) bool) []byte { 598 return TrimRightFunc(TrimLeftFunc(s, f), f) 599 } 600 601 // TrimPrefix returns s without the provided leading prefix string. 602 // If s doesn't start with prefix, s is returned unchanged. 603 func TrimPrefix(s, prefix []byte) []byte { 604 if HasPrefix(s, prefix) { 605 return s[len(prefix):] 606 } 607 return s 608 } 609 610 // TrimSuffix returns s without the provided trailing suffix string. 611 // If s doesn't end with suffix, s is returned unchanged. 612 func TrimSuffix(s, suffix []byte) []byte { 613 if HasSuffix(s, suffix) { 614 return s[:len(s)-len(suffix)] 615 } 616 return s 617 } 618 619 // IndexFunc interprets s as a sequence of UTF-8-encoded code points. 620 // It returns the byte index in s of the first Unicode 621 // code point satisfying f(c), or -1 if none do. 622 func IndexFunc(s []byte, f func(r rune) bool) int { 623 return indexFunc(s, f, true) 624 } 625 626 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. 627 // It returns the byte index in s of the last Unicode 628 // code point satisfying f(c), or -1 if none do. 629 func LastIndexFunc(s []byte, f func(r rune) bool) int { 630 return lastIndexFunc(s, f, true) 631 } 632 633 // indexFunc is the same as IndexFunc except that if 634 // truth==false, the sense of the predicate function is 635 // inverted. 636 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 637 start := 0 638 for start < len(s) { 639 wid := 1 640 r := rune(s[start]) 641 if r >= utf8.RuneSelf { 642 r, wid = utf8.DecodeRune(s[start:]) 643 } 644 if f(r) == truth { 645 return start 646 } 647 start += wid 648 } 649 return -1 650 } 651 652 // lastIndexFunc is the same as LastIndexFunc except that if 653 // truth==false, the sense of the predicate function is 654 // inverted. 655 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 656 for i := len(s); i > 0; { 657 r, size := rune(s[i-1]), 1 658 if r >= utf8.RuneSelf { 659 r, size = utf8.DecodeLastRune(s[0:i]) 660 } 661 i -= size 662 if f(r) == truth { 663 return i 664 } 665 } 666 return -1 667 } 668 669 // asciiSet is a 32-byte value, where each bit represents the presence of a 670 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 671 // starting with the least-significant bit of the lowest word to the 672 // most-significant bit of the highest word, map to the full range of all 673 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 674 // ensuring that any non-ASCII character will be reported as not in the set. 675 type asciiSet [8]uint32 676 677 // makeASCIISet creates a set of ASCII characters and reports whether all 678 // characters in chars are ASCII. 679 func makeASCIISet(chars string) (as asciiSet, ok bool) { 680 for i := 0; i < len(chars); i++ { 681 c := chars[i] 682 if c >= utf8.RuneSelf { 683 return as, false 684 } 685 as[c>>5] |= 1 << uint(c&31) 686 } 687 return as, true 688 } 689 690 // contains reports whether c is inside the set. 691 func (as *asciiSet) contains(c byte) bool { 692 return (as[c>>5] & (1 << uint(c&31))) != 0 693 } 694 695 func makeCutsetFunc(cutset string) func(r rune) bool { 696 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 697 return func(r rune) bool { 698 return r == rune(cutset[0]) 699 } 700 } 701 if as, isASCII := makeASCIISet(cutset); isASCII { 702 return func(r rune) bool { 703 return r < utf8.RuneSelf && as.contains(byte(r)) 704 } 705 } 706 return func(r rune) bool { 707 for _, c := range cutset { 708 if c == r { 709 return true 710 } 711 } 712 return false 713 } 714 } 715 716 // Trim returns a subslice of s by slicing off all leading and 717 // trailing UTF-8-encoded code points contained in cutset. 718 func Trim(s []byte, cutset string) []byte { 719 return TrimFunc(s, makeCutsetFunc(cutset)) 720 } 721 722 // TrimLeft returns a subslice of s by slicing off all leading 723 // UTF-8-encoded code points contained in cutset. 724 func TrimLeft(s []byte, cutset string) []byte { 725 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 726 } 727 728 // TrimRight returns a subslice of s by slicing off all trailing 729 // UTF-8-encoded code points that are contained in cutset. 730 func TrimRight(s []byte, cutset string) []byte { 731 return TrimRightFunc(s, makeCutsetFunc(cutset)) 732 } 733 734 // TrimSpace returns a subslice of s by slicing off all leading and 735 // trailing white space, as defined by Unicode. 736 func TrimSpace(s []byte) []byte { 737 return TrimFunc(s, unicode.IsSpace) 738 } 739 740 // Runes interprets s as a sequence of UTF-8-encoded code points. 741 // It returns a slice of runes (Unicode code points) equivalent to s. 742 func Runes(s []byte) []rune { 743 t := make([]rune, utf8.RuneCount(s)) 744 i := 0 745 for len(s) > 0 { 746 r, l := utf8.DecodeRune(s) 747 t[i] = r 748 i++ 749 s = s[l:] 750 } 751 return t 752 } 753 754 // Replace returns a copy of the slice s with the first n 755 // non-overlapping instances of old replaced by new. 756 // If old is empty, it matches at the beginning of the slice 757 // and after each UTF-8 sequence, yielding up to k+1 replacements 758 // for a k-rune slice. 759 // If n < 0, there is no limit on the number of replacements. 760 func Replace(s, old, new []byte, n int) []byte { 761 m := 0 762 if n != 0 { 763 // Compute number of replacements. 764 m = Count(s, old) 765 } 766 if m == 0 { 767 // Just return a copy. 768 return append([]byte(nil), s...) 769 } 770 if n < 0 || m < n { 771 n = m 772 } 773 774 // Apply replacements to buffer. 775 t := make([]byte, len(s)+n*(len(new)-len(old))) 776 w := 0 777 start := 0 778 for i := 0; i < n; i++ { 779 j := start 780 if len(old) == 0 { 781 if i > 0 { 782 _, wid := utf8.DecodeRune(s[start:]) 783 j += wid 784 } 785 } else { 786 j += Index(s[start:], old) 787 } 788 w += copy(t[w:], s[start:j]) 789 w += copy(t[w:], new) 790 start = j + len(old) 791 } 792 w += copy(t[w:], s[start:]) 793 return t[0:w] 794 } 795 796 // ReplaceAll returns a copy of the slice s with all 797 // non-overlapping instances of old replaced by new. 798 // If old is empty, it matches at the beginning of the slice 799 // and after each UTF-8 sequence, yielding up to k+1 replacements 800 // for a k-rune slice. 801 func ReplaceAll(s, old, new []byte) []byte { 802 return Replace(s, old, new, -1) 803 } 804 805 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 806 // are equal under Unicode case-folding. 807 func EqualFold(s, t []byte) bool { 808 for len(s) != 0 && len(t) != 0 { 809 // Extract first rune from each. 810 var sr, tr rune 811 if s[0] < utf8.RuneSelf { 812 sr, s = rune(s[0]), s[1:] 813 } else { 814 r, size := utf8.DecodeRune(s) 815 sr, s = r, s[size:] 816 } 817 if t[0] < utf8.RuneSelf { 818 tr, t = rune(t[0]), t[1:] 819 } else { 820 r, size := utf8.DecodeRune(t) 821 tr, t = r, t[size:] 822 } 823 824 // If they match, keep going; if not, return false. 825 826 // Easy case. 827 if tr == sr { 828 continue 829 } 830 831 // Make sr < tr to simplify what follows. 832 if tr < sr { 833 tr, sr = sr, tr 834 } 835 // Fast check for ASCII. 836 if tr < utf8.RuneSelf { 837 // ASCII only, sr/tr must be upper/lower case 838 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 839 continue 840 } 841 return false 842 } 843 844 // General case. SimpleFold(x) returns the next equivalent rune > x 845 // or wraps around to smaller values. 846 r := unicode.SimpleFold(sr) 847 for r != sr && r < tr { 848 r = unicode.SimpleFold(r) 849 } 850 if r == tr { 851 continue 852 } 853 return false 854 } 855 856 // One string is empty. Are both? 857 return len(s) == len(t) 858 } 859 860 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 861 func Index(s, sep []byte) int { 862 n := len(sep) 863 switch { 864 case n == 0: 865 return 0 866 case n == 1: 867 return IndexByte(s, sep[0]) 868 case n == len(s): 869 if Equal(sep, s) { 870 return 0 871 } 872 return -1 873 case n > len(s): 874 return -1 875 case n <= bytealg.MaxLen: 876 // Use brute force when s and sep both are small 877 if len(s) <= bytealg.MaxBruteForce { 878 return bytealg.Index(s, sep) 879 } 880 c0 := sep[0] 881 c1 := sep[1] 882 i := 0 883 t := len(s) - n + 1 884 fails := 0 885 for i < t { 886 if s[i] != c0 { 887 // IndexByte is faster than bytealg.Index, so use it as long as 888 // we're not getting lots of false positives. 889 o := IndexByte(s[i:t], c0) 890 if o < 0 { 891 return -1 892 } 893 i += o 894 } 895 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 896 return i 897 } 898 fails++ 899 i++ 900 // Switch to bytealg.Index when IndexByte produces too many false positives. 901 if fails > bytealg.Cutover(i) { 902 r := bytealg.Index(s[i:], sep) 903 if r >= 0 { 904 return r + i 905 } 906 return -1 907 } 908 } 909 return -1 910 } 911 c0 := sep[0] 912 c1 := sep[1] 913 i := 0 914 fails := 0 915 t := len(s) - n + 1 916 for i < t { 917 if s[i] != c0 { 918 o := IndexByte(s[i:t], c0) 919 if o < 0 { 920 break 921 } 922 i += o 923 } 924 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 925 return i 926 } 927 i++ 928 fails++ 929 if fails >= 4+i>>4 && i < t { 930 // Give up on IndexByte, it isn't skipping ahead 931 // far enough to be better than Rabin-Karp. 932 // Experiments (using IndexPeriodic) suggest 933 // the cutover is about 16 byte skips. 934 // TODO: if large prefixes of sep are matching 935 // we should cutover at even larger average skips, 936 // because Equal becomes that much more expensive. 937 // This code does not take that effect into account. 938 j := indexRabinKarp(s[i:], sep) 939 if j < 0 { 940 return -1 941 } 942 return i + j 943 } 944 } 945 return -1 946 } 947 948 func indexRabinKarp(s, sep []byte) int { 949 // Rabin-Karp search 950 hashsep, pow := hashStr(sep) 951 n := len(sep) 952 var h uint32 953 for i := 0; i < n; i++ { 954 h = h*primeRK + uint32(s[i]) 955 } 956 if h == hashsep && Equal(s[:n], sep) { 957 return 0 958 } 959 for i := n; i < len(s); { 960 h *= primeRK 961 h += uint32(s[i]) 962 h -= pow * uint32(s[i-n]) 963 i++ 964 if h == hashsep && Equal(s[i-n:i], sep) { 965 return i - n 966 } 967 } 968 return -1 969 } 970 971 // primeRK is the prime base used in Rabin-Karp algorithm. 972 const primeRK = 16777619 973 974 // hashStr returns the hash and the appropriate multiplicative 975 // factor for use in Rabin-Karp algorithm. 976 func hashStr(sep []byte) (uint32, uint32) { 977 hash := uint32(0) 978 for i := 0; i < len(sep); i++ { 979 hash = hash*primeRK + uint32(sep[i]) 980 } 981 var pow, sq uint32 = 1, primeRK 982 for i := len(sep); i > 0; i >>= 1 { 983 if i&1 != 0 { 984 pow *= sq 985 } 986 sq *= sq 987 } 988 return hash, pow 989 }