github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/bytes/bytes.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "internal/bytealg" 11 "unicode" 12 "unicode/utf8" 13 ) 14 15 func equalPortable(a, b []byte) bool { 16 if len(a) != len(b) { 17 return false 18 } 19 for i, c := range a { 20 if c != b[i] { 21 return false 22 } 23 } 24 return true 25 } 26 27 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 28 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 29 func explode(s []byte, n int) [][]byte { 30 if n <= 0 { 31 n = len(s) 32 } 33 a := make([][]byte, n) 34 var size int 35 na := 0 36 for len(s) > 0 { 37 if na+1 >= n { 38 a[na] = s 39 na++ 40 break 41 } 42 _, size = utf8.DecodeRune(s) 43 a[na] = s[0:size:size] 44 s = s[size:] 45 na++ 46 } 47 return a[0:na] 48 } 49 50 // Count counts the number of non-overlapping instances of sep in s. 51 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. 52 func Count(s, sep []byte) int { 53 // special case 54 if len(sep) == 0 { 55 return utf8.RuneCount(s) + 1 56 } 57 if len(sep) == 1 { 58 return bytealg.Count(s, sep[0]) 59 } 60 n := 0 61 for { 62 i := Index(s, sep) 63 if i == -1 { 64 return n 65 } 66 n++ 67 s = s[i+len(sep):] 68 } 69 } 70 71 // Contains reports whether subslice is within b. 72 func Contains(b, subslice []byte) bool { 73 return Index(b, subslice) != -1 74 } 75 76 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. 77 func ContainsAny(b []byte, chars string) bool { 78 return IndexAny(b, chars) >= 0 79 } 80 81 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. 82 func ContainsRune(b []byte, r rune) bool { 83 return IndexRune(b, r) >= 0 84 } 85 86 func indexBytePortable(s []byte, c byte) int { 87 for i, b := range s { 88 if b == c { 89 return i 90 } 91 } 92 return -1 93 } 94 95 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 96 func LastIndex(s, sep []byte) int { 97 n := len(sep) 98 if n == 0 { 99 return len(s) 100 } 101 c := sep[0] 102 for i := len(s) - n; i >= 0; i-- { 103 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { 104 return i 105 } 106 } 107 return -1 108 } 109 110 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 111 func LastIndexByte(s []byte, c byte) int { 112 for i := len(s) - 1; i >= 0; i-- { 113 if s[i] == c { 114 return i 115 } 116 } 117 return -1 118 } 119 120 // IndexRune interprets s as a sequence of UTF-8-encoded code points. 121 // It returns the byte index of the first occurrence in s of the given rune. 122 // It returns -1 if rune is not present in s. 123 // If r is utf8.RuneError, it returns the first instance of any 124 // invalid UTF-8 byte sequence. 125 func IndexRune(s []byte, r rune) int { 126 switch { 127 case 0 <= r && r < utf8.RuneSelf: 128 return IndexByte(s, byte(r)) 129 case r == utf8.RuneError: 130 for i := 0; i < len(s); { 131 r1, n := utf8.DecodeRune(s[i:]) 132 if r1 == utf8.RuneError { 133 return i 134 } 135 i += n 136 } 137 return -1 138 case !utf8.ValidRune(r): 139 return -1 140 default: 141 var b [utf8.UTFMax]byte 142 n := utf8.EncodeRune(b[:], r) 143 return Index(s, b[:n]) 144 } 145 } 146 147 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 148 // It returns the byte index of the first occurrence in s of any of the Unicode 149 // code points in chars. It returns -1 if chars is empty or if there is no code 150 // point in common. 151 func IndexAny(s []byte, chars string) int { 152 if chars == "" { 153 // Avoid scanning all of s. 154 return -1 155 } 156 if len(s) > 8 { 157 if as, isASCII := makeASCIISet(chars); isASCII { 158 for i, c := range s { 159 if as.contains(c) { 160 return i 161 } 162 } 163 return -1 164 } 165 } 166 var width int 167 for i := 0; i < len(s); i += width { 168 r := rune(s[i]) 169 if r < utf8.RuneSelf { 170 width = 1 171 } else { 172 r, width = utf8.DecodeRune(s[i:]) 173 } 174 for _, ch := range chars { 175 if r == ch { 176 return i 177 } 178 } 179 } 180 return -1 181 } 182 183 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 184 // points. It returns the byte index of the last occurrence in s of any of 185 // the Unicode code points in chars. It returns -1 if chars is empty or if 186 // there is no code point in common. 187 func LastIndexAny(s []byte, chars string) int { 188 if chars == "" { 189 // Avoid scanning all of s. 190 return -1 191 } 192 if len(s) > 8 { 193 if as, isASCII := makeASCIISet(chars); isASCII { 194 for i := len(s) - 1; i >= 0; i-- { 195 if as.contains(s[i]) { 196 return i 197 } 198 } 199 return -1 200 } 201 } 202 for i := len(s); i > 0; { 203 r, size := utf8.DecodeLastRune(s[:i]) 204 i -= size 205 for _, c := range chars { 206 if r == c { 207 return i 208 } 209 } 210 } 211 return -1 212 } 213 214 // Generic split: splits after each instance of sep, 215 // including sepSave bytes of sep in the subslices. 216 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 217 if n == 0 { 218 return nil 219 } 220 if len(sep) == 0 { 221 return explode(s, n) 222 } 223 if n < 0 { 224 n = Count(s, sep) + 1 225 } 226 227 a := make([][]byte, n) 228 n-- 229 i := 0 230 for i < n { 231 m := Index(s, sep) 232 if m < 0 { 233 break 234 } 235 a[i] = s[: m+sepSave : m+sepSave] 236 s = s[m+len(sep):] 237 i++ 238 } 239 a[i] = s 240 return a[:i+1] 241 } 242 243 // SplitN slices s into subslices separated by sep and returns a slice of 244 // the subslices between those separators. 245 // If sep is empty, SplitN splits after each UTF-8 sequence. 246 // The count determines the number of subslices to return: 247 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 248 // n == 0: the result is nil (zero subslices) 249 // n < 0: all subslices 250 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 251 252 // SplitAfterN slices s into subslices after each instance of sep and 253 // returns a slice of those subslices. 254 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 255 // The count determines the number of subslices to return: 256 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 257 // n == 0: the result is nil (zero subslices) 258 // n < 0: all subslices 259 func SplitAfterN(s, sep []byte, n int) [][]byte { 260 return genSplit(s, sep, len(sep), n) 261 } 262 263 // Split slices s into all subslices separated by sep and returns a slice of 264 // the subslices between those separators. 265 // If sep is empty, Split splits after each UTF-8 sequence. 266 // It is equivalent to SplitN with a count of -1. 267 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 268 269 // SplitAfter slices s into all subslices after each instance of sep and 270 // returns a slice of those subslices. 271 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 272 // It is equivalent to SplitAfterN with a count of -1. 273 func SplitAfter(s, sep []byte) [][]byte { 274 return genSplit(s, sep, len(sep), -1) 275 } 276 277 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 278 279 // Fields interprets s as a sequence of UTF-8-encoded code points. 280 // It splits the slice s around each instance of one or more consecutive white space 281 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an 282 // empty slice if s contains only white space. 283 func Fields(s []byte) [][]byte { 284 // First count the fields. 285 // This is an exact count if s is ASCII, otherwise it is an approximation. 286 n := 0 287 wasSpace := 1 288 // setBits is used to track which bits are set in the bytes of s. 289 setBits := uint8(0) 290 for i := 0; i < len(s); i++ { 291 r := s[i] 292 setBits |= r 293 isSpace := int(asciiSpace[r]) 294 n += wasSpace & ^isSpace 295 wasSpace = isSpace 296 } 297 298 if setBits >= utf8.RuneSelf { 299 // Some runes in the input slice are not ASCII. 300 return FieldsFunc(s, unicode.IsSpace) 301 } 302 303 // ASCII fast path 304 a := make([][]byte, n) 305 na := 0 306 fieldStart := 0 307 i := 0 308 // Skip spaces in the front of the input. 309 for i < len(s) && asciiSpace[s[i]] != 0 { 310 i++ 311 } 312 fieldStart = i 313 for i < len(s) { 314 if asciiSpace[s[i]] == 0 { 315 i++ 316 continue 317 } 318 a[na] = s[fieldStart:i:i] 319 na++ 320 i++ 321 // Skip spaces in between fields. 322 for i < len(s) && asciiSpace[s[i]] != 0 { 323 i++ 324 } 325 fieldStart = i 326 } 327 if fieldStart < len(s) { // Last field might end at EOF. 328 a[na] = s[fieldStart:len(s):len(s)] 329 } 330 return a 331 } 332 333 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points. 334 // It splits the slice s at each run of code points c satisfying f(c) and 335 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 336 // len(s) == 0, an empty slice is returned. 337 // FieldsFunc makes no guarantees about the order in which it calls f(c). 338 // If f does not return consistent results for a given c, FieldsFunc may crash. 339 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 340 // A span is used to record a slice of s of the form s[start:end]. 341 // The start index is inclusive and the end index is exclusive. 342 type span struct { 343 start int 344 end int 345 } 346 spans := make([]span, 0, 32) 347 348 // Find the field start and end indices. 349 wasField := false 350 fromIndex := 0 351 for i := 0; i < len(s); { 352 size := 1 353 r := rune(s[i]) 354 if r >= utf8.RuneSelf { 355 r, size = utf8.DecodeRune(s[i:]) 356 } 357 if f(r) { 358 if wasField { 359 spans = append(spans, span{start: fromIndex, end: i}) 360 wasField = false 361 } 362 } else { 363 if !wasField { 364 fromIndex = i 365 wasField = true 366 } 367 } 368 i += size 369 } 370 371 // Last field might end at EOF. 372 if wasField { 373 spans = append(spans, span{fromIndex, len(s)}) 374 } 375 376 // Create subslices from recorded field indices. 377 a := make([][]byte, len(spans)) 378 for i, span := range spans { 379 a[i] = s[span.start:span.end:span.end] 380 } 381 382 return a 383 } 384 385 // Join concatenates the elements of s to create a new byte slice. The separator 386 // sep is placed between elements in the resulting slice. 387 func Join(s [][]byte, sep []byte) []byte { 388 if len(s) == 0 { 389 return []byte{} 390 } 391 if len(s) == 1 { 392 // Just return a copy. 393 return append([]byte(nil), s[0]...) 394 } 395 n := len(sep) * (len(s) - 1) 396 for _, v := range s { 397 n += len(v) 398 } 399 400 b := make([]byte, n) 401 bp := copy(b, s[0]) 402 for _, v := range s[1:] { 403 bp += copy(b[bp:], sep) 404 bp += copy(b[bp:], v) 405 } 406 return b 407 } 408 409 // HasPrefix tests whether the byte slice s begins with prefix. 410 func HasPrefix(s, prefix []byte) bool { 411 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 412 } 413 414 // HasSuffix tests whether the byte slice s ends with suffix. 415 func HasSuffix(s, suffix []byte) bool { 416 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 417 } 418 419 // Map returns a copy of the byte slice s with all its characters modified 420 // according to the mapping function. If mapping returns a negative value, the character is 421 // dropped from the byte slice with no replacement. The characters in s and the 422 // output are interpreted as UTF-8-encoded code points. 423 func Map(mapping func(r rune) rune, s []byte) []byte { 424 // In the worst case, the slice can grow when mapped, making 425 // things unpleasant. But it's so rare we barge in assuming it's 426 // fine. It could also shrink but that falls out naturally. 427 maxbytes := len(s) // length of b 428 nbytes := 0 // number of bytes encoded in b 429 b := make([]byte, maxbytes) 430 for i := 0; i < len(s); { 431 wid := 1 432 r := rune(s[i]) 433 if r >= utf8.RuneSelf { 434 r, wid = utf8.DecodeRune(s[i:]) 435 } 436 r = mapping(r) 437 if r >= 0 { 438 rl := utf8.RuneLen(r) 439 if rl < 0 { 440 rl = len(string(utf8.RuneError)) 441 } 442 if nbytes+rl > maxbytes { 443 // Grow the buffer. 444 maxbytes = maxbytes*2 + utf8.UTFMax 445 nb := make([]byte, maxbytes) 446 copy(nb, b[0:nbytes]) 447 b = nb 448 } 449 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 450 } 451 i += wid 452 } 453 return b[0:nbytes] 454 } 455 456 // Repeat returns a new byte slice consisting of count copies of b. 457 // 458 // It panics if count is negative or if 459 // the result of (len(b) * count) overflows. 460 func Repeat(b []byte, count int) []byte { 461 // Since we cannot return an error on overflow, 462 // we should panic if the repeat will generate 463 // an overflow. 464 // See Issue golang.org/issue/16237. 465 if count < 0 { 466 panic("bytes: negative Repeat count") 467 } else if count > 0 && len(b)*count/count != len(b) { 468 panic("bytes: Repeat count causes overflow") 469 } 470 471 nb := make([]byte, len(b)*count) 472 bp := copy(nb, b) 473 for bp < len(nb) { 474 copy(nb[bp:], nb[:bp]) 475 bp *= 2 476 } 477 return nb 478 } 479 480 // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. 481 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } 482 483 // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. 484 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } 485 486 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. 487 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 488 489 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 490 // upper case, giving priority to the special casing rules. 491 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { 492 return Map(c.ToUpper, s) 493 } 494 495 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 496 // lower case, giving priority to the special casing rules. 497 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { 498 return Map(c.ToLower, s) 499 } 500 501 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 502 // title case, giving priority to the special casing rules. 503 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { 504 return Map(c.ToTitle, s) 505 } 506 507 // isSeparator reports whether the rune could mark a word boundary. 508 // TODO: update when package unicode captures more of the properties. 509 func isSeparator(r rune) bool { 510 // ASCII alphanumerics and underscore are not separators 511 if r <= 0x7F { 512 switch { 513 case '0' <= r && r <= '9': 514 return false 515 case 'a' <= r && r <= 'z': 516 return false 517 case 'A' <= r && r <= 'Z': 518 return false 519 case r == '_': 520 return false 521 } 522 return true 523 } 524 // Letters and digits are not separators 525 if unicode.IsLetter(r) || unicode.IsDigit(r) { 526 return false 527 } 528 // Otherwise, all we can do for now is treat spaces as separators. 529 return unicode.IsSpace(r) 530 } 531 532 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin 533 // words mapped to their title case. 534 // 535 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 536 func Title(s []byte) []byte { 537 // Use a closure here to remember state. 538 // Hackish but effective. Depends on Map scanning in order and calling 539 // the closure once per rune. 540 prev := ' ' 541 return Map( 542 func(r rune) rune { 543 if isSeparator(prev) { 544 prev = r 545 return unicode.ToTitle(r) 546 } 547 prev = r 548 return r 549 }, 550 s) 551 } 552 553 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off 554 // all leading UTF-8-encoded code points c that satisfy f(c). 555 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 556 i := indexFunc(s, f, false) 557 if i == -1 { 558 return nil 559 } 560 return s[i:] 561 } 562 563 // TrimRightFunc returns a subslice of s by slicing off all trailing 564 // UTF-8-encoded code points c that satisfy f(c). 565 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 566 i := lastIndexFunc(s, f, false) 567 if i >= 0 && s[i] >= utf8.RuneSelf { 568 _, wid := utf8.DecodeRune(s[i:]) 569 i += wid 570 } else { 571 i++ 572 } 573 return s[0:i] 574 } 575 576 // TrimFunc returns a subslice of s by slicing off all leading and trailing 577 // UTF-8-encoded code points c that satisfy f(c). 578 func TrimFunc(s []byte, f func(r rune) bool) []byte { 579 return TrimRightFunc(TrimLeftFunc(s, f), f) 580 } 581 582 // TrimPrefix returns s without the provided leading prefix string. 583 // If s doesn't start with prefix, s is returned unchanged. 584 func TrimPrefix(s, prefix []byte) []byte { 585 if HasPrefix(s, prefix) { 586 return s[len(prefix):] 587 } 588 return s 589 } 590 591 // TrimSuffix returns s without the provided trailing suffix string. 592 // If s doesn't end with suffix, s is returned unchanged. 593 func TrimSuffix(s, suffix []byte) []byte { 594 if HasSuffix(s, suffix) { 595 return s[:len(s)-len(suffix)] 596 } 597 return s 598 } 599 600 // IndexFunc interprets s as a sequence of UTF-8-encoded code points. 601 // It returns the byte index in s of the first Unicode 602 // code point satisfying f(c), or -1 if none do. 603 func IndexFunc(s []byte, f func(r rune) bool) int { 604 return indexFunc(s, f, true) 605 } 606 607 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. 608 // It returns the byte index in s of the last Unicode 609 // code point satisfying f(c), or -1 if none do. 610 func LastIndexFunc(s []byte, f func(r rune) bool) int { 611 return lastIndexFunc(s, f, true) 612 } 613 614 // indexFunc is the same as IndexFunc except that if 615 // truth==false, the sense of the predicate function is 616 // inverted. 617 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 618 start := 0 619 for start < len(s) { 620 wid := 1 621 r := rune(s[start]) 622 if r >= utf8.RuneSelf { 623 r, wid = utf8.DecodeRune(s[start:]) 624 } 625 if f(r) == truth { 626 return start 627 } 628 start += wid 629 } 630 return -1 631 } 632 633 // lastIndexFunc is the same as LastIndexFunc except that if 634 // truth==false, the sense of the predicate function is 635 // inverted. 636 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 637 for i := len(s); i > 0; { 638 r, size := rune(s[i-1]), 1 639 if r >= utf8.RuneSelf { 640 r, size = utf8.DecodeLastRune(s[0:i]) 641 } 642 i -= size 643 if f(r) == truth { 644 return i 645 } 646 } 647 return -1 648 } 649 650 // asciiSet is a 32-byte value, where each bit represents the presence of a 651 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 652 // starting with the least-significant bit of the lowest word to the 653 // most-significant bit of the highest word, map to the full range of all 654 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 655 // ensuring that any non-ASCII character will be reported as not in the set. 656 type asciiSet [8]uint32 657 658 // makeASCIISet creates a set of ASCII characters and reports whether all 659 // characters in chars are ASCII. 660 func makeASCIISet(chars string) (as asciiSet, ok bool) { 661 for i := 0; i < len(chars); i++ { 662 c := chars[i] 663 if c >= utf8.RuneSelf { 664 return as, false 665 } 666 as[c>>5] |= 1 << uint(c&31) 667 } 668 return as, true 669 } 670 671 // contains reports whether c is inside the set. 672 func (as *asciiSet) contains(c byte) bool { 673 return (as[c>>5] & (1 << uint(c&31))) != 0 674 } 675 676 func makeCutsetFunc(cutset string) func(r rune) bool { 677 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 678 return func(r rune) bool { 679 return r == rune(cutset[0]) 680 } 681 } 682 if as, isASCII := makeASCIISet(cutset); isASCII { 683 return func(r rune) bool { 684 return r < utf8.RuneSelf && as.contains(byte(r)) 685 } 686 } 687 return func(r rune) bool { 688 for _, c := range cutset { 689 if c == r { 690 return true 691 } 692 } 693 return false 694 } 695 } 696 697 // Trim returns a subslice of s by slicing off all leading and 698 // trailing UTF-8-encoded code points contained in cutset. 699 func Trim(s []byte, cutset string) []byte { 700 return TrimFunc(s, makeCutsetFunc(cutset)) 701 } 702 703 // TrimLeft returns a subslice of s by slicing off all leading 704 // UTF-8-encoded code points contained in cutset. 705 func TrimLeft(s []byte, cutset string) []byte { 706 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 707 } 708 709 // TrimRight returns a subslice of s by slicing off all trailing 710 // UTF-8-encoded code points that are contained in cutset. 711 func TrimRight(s []byte, cutset string) []byte { 712 return TrimRightFunc(s, makeCutsetFunc(cutset)) 713 } 714 715 // TrimSpace returns a subslice of s by slicing off all leading and 716 // trailing white space, as defined by Unicode. 717 func TrimSpace(s []byte) []byte { 718 return TrimFunc(s, unicode.IsSpace) 719 } 720 721 // Runes interprets s as a sequence of UTF-8-encoded code points. 722 // It returns a slice of runes (Unicode code points) equivalent to s. 723 func Runes(s []byte) []rune { 724 t := make([]rune, utf8.RuneCount(s)) 725 i := 0 726 for len(s) > 0 { 727 r, l := utf8.DecodeRune(s) 728 t[i] = r 729 i++ 730 s = s[l:] 731 } 732 return t 733 } 734 735 // Replace returns a copy of the slice s with the first n 736 // non-overlapping instances of old replaced by new. 737 // If old is empty, it matches at the beginning of the slice 738 // and after each UTF-8 sequence, yielding up to k+1 replacements 739 // for a k-rune slice. 740 // If n < 0, there is no limit on the number of replacements. 741 func Replace(s, old, new []byte, n int) []byte { 742 m := 0 743 if n != 0 { 744 // Compute number of replacements. 745 m = Count(s, old) 746 } 747 if m == 0 { 748 // Just return a copy. 749 return append([]byte(nil), s...) 750 } 751 if n < 0 || m < n { 752 n = m 753 } 754 755 // Apply replacements to buffer. 756 t := make([]byte, len(s)+n*(len(new)-len(old))) 757 w := 0 758 start := 0 759 for i := 0; i < n; i++ { 760 j := start 761 if len(old) == 0 { 762 if i > 0 { 763 _, wid := utf8.DecodeRune(s[start:]) 764 j += wid 765 } 766 } else { 767 j += Index(s[start:], old) 768 } 769 w += copy(t[w:], s[start:j]) 770 w += copy(t[w:], new) 771 start = j + len(old) 772 } 773 w += copy(t[w:], s[start:]) 774 return t[0:w] 775 } 776 777 // ReplaceAll returns a copy of the slice s with all 778 // non-overlapping instances of old replaced by new. 779 // If old is empty, it matches at the beginning of the slice 780 // and after each UTF-8 sequence, yielding up to k+1 replacements 781 // for a k-rune slice. 782 func ReplaceAll(s, old, new []byte) []byte { 783 return Replace(s, old, new, -1) 784 } 785 786 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 787 // are equal under Unicode case-folding. 788 func EqualFold(s, t []byte) bool { 789 for len(s) != 0 && len(t) != 0 { 790 // Extract first rune from each. 791 var sr, tr rune 792 if s[0] < utf8.RuneSelf { 793 sr, s = rune(s[0]), s[1:] 794 } else { 795 r, size := utf8.DecodeRune(s) 796 sr, s = r, s[size:] 797 } 798 if t[0] < utf8.RuneSelf { 799 tr, t = rune(t[0]), t[1:] 800 } else { 801 r, size := utf8.DecodeRune(t) 802 tr, t = r, t[size:] 803 } 804 805 // If they match, keep going; if not, return false. 806 807 // Easy case. 808 if tr == sr { 809 continue 810 } 811 812 // Make sr < tr to simplify what follows. 813 if tr < sr { 814 tr, sr = sr, tr 815 } 816 // Fast check for ASCII. 817 if tr < utf8.RuneSelf { 818 // ASCII only, sr/tr must be upper/lower case 819 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { 820 continue 821 } 822 return false 823 } 824 825 // General case. SimpleFold(x) returns the next equivalent rune > x 826 // or wraps around to smaller values. 827 r := unicode.SimpleFold(sr) 828 for r != sr && r < tr { 829 r = unicode.SimpleFold(r) 830 } 831 if r == tr { 832 continue 833 } 834 return false 835 } 836 837 // One string is empty. Are both? 838 return len(s) == len(t) 839 } 840 841 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 842 func Index(s, sep []byte) int { 843 n := len(sep) 844 switch { 845 case n == 0: 846 return 0 847 case n == 1: 848 return IndexByte(s, sep[0]) 849 case n == len(s): 850 if Equal(sep, s) { 851 return 0 852 } 853 return -1 854 case n > len(s): 855 return -1 856 case n <= bytealg.MaxLen: 857 // Use brute force when s and sep both are small 858 if len(s) <= bytealg.MaxBruteForce { 859 return bytealg.Index(s, sep) 860 } 861 c0 := sep[0] 862 c1 := sep[1] 863 i := 0 864 t := len(s) - n + 1 865 fails := 0 866 for i < t { 867 if s[i] != c0 { 868 // IndexByte is faster than bytealg.Index, so use it as long as 869 // we're not getting lots of false positives. 870 o := IndexByte(s[i:t], c0) 871 if o < 0 { 872 return -1 873 } 874 i += o 875 } 876 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 877 return i 878 } 879 fails++ 880 i++ 881 // Switch to bytealg.Index when IndexByte produces too many false positives. 882 if fails > bytealg.Cutover(i) { 883 r := bytealg.Index(s[i:], sep) 884 if r >= 0 { 885 return r + i 886 } 887 return -1 888 } 889 } 890 return -1 891 } 892 c0 := sep[0] 893 c1 := sep[1] 894 i := 0 895 fails := 0 896 t := len(s) - n + 1 897 for i < t { 898 if s[i] != c0 { 899 o := IndexByte(s[i:t], c0) 900 if o < 0 { 901 break 902 } 903 i += o 904 } 905 if s[i+1] == c1 && Equal(s[i:i+n], sep) { 906 return i 907 } 908 i++ 909 fails++ 910 if fails >= 4+i>>4 && i < t { 911 // Give up on IndexByte, it isn't skipping ahead 912 // far enough to be better than Rabin-Karp. 913 // Experiments (using IndexPeriodic) suggest 914 // the cutover is about 16 byte skips. 915 // TODO: if large prefixes of sep are matching 916 // we should cutover at even larger average skips, 917 // because Equal becomes that much more expensive. 918 // This code does not take that effect into account. 919 j := indexRabinKarp(s[i:], sep) 920 if j < 0 { 921 return -1 922 } 923 return i + j 924 } 925 } 926 return -1 927 } 928 929 func indexRabinKarp(s, sep []byte) int { 930 // Rabin-Karp search 931 hashsep, pow := hashStr(sep) 932 n := len(sep) 933 var h uint32 934 for i := 0; i < n; i++ { 935 h = h*primeRK + uint32(s[i]) 936 } 937 if h == hashsep && Equal(s[:n], sep) { 938 return 0 939 } 940 for i := n; i < len(s); { 941 h *= primeRK 942 h += uint32(s[i]) 943 h -= pow * uint32(s[i-n]) 944 i++ 945 if h == hashsep && Equal(s[i-n:i], sep) { 946 return i - n 947 } 948 } 949 return -1 950 } 951 952 // primeRK is the prime base used in Rabin-Karp algorithm. 953 const primeRK = 16777619 954 955 // hashStr returns the hash and the appropriate multiplicative 956 // factor for use in Rabin-Karp algorithm. 957 func hashStr(sep []byte) (uint32, uint32) { 958 hash := uint32(0) 959 for i := 0; i < len(sep); i++ { 960 hash = hash*primeRK + uint32(sep[i]) 961 } 962 var pow, sq uint32 = 1, primeRK 963 for i := len(sep); i > 0; i >>= 1 { 964 if i&1 != 0 { 965 pow *= sq 966 } 967 sq *= sq 968 } 969 return hash, pow 970 }