github.com/c9s/go@v0.0.0-20180120015821-984e81f64e0c/src/bytes/bytes.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 func equalPortable(a, b []byte) bool { 15 if len(a) != len(b) { 16 return false 17 } 18 for i, c := range a { 19 if c != b[i] { 20 return false 21 } 22 } 23 return true 24 } 25 26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 28 func explode(s []byte, n int) [][]byte { 29 if n <= 0 { 30 n = len(s) 31 } 32 a := make([][]byte, n) 33 var size int 34 na := 0 35 for len(s) > 0 { 36 if na+1 >= n { 37 a[na] = s 38 na++ 39 break 40 } 41 _, size = utf8.DecodeRune(s) 42 a[na] = s[0:size:size] 43 s = s[size:] 44 na++ 45 } 46 return a[0:na] 47 } 48 49 // countGeneric actually implements Count 50 func countGeneric(s, sep []byte) int { 51 // special case 52 if len(sep) == 0 { 53 return utf8.RuneCount(s) + 1 54 } 55 n := 0 56 for { 57 i := Index(s, sep) 58 if i == -1 { 59 return n 60 } 61 n++ 62 s = s[i+len(sep):] 63 } 64 } 65 66 // Contains reports whether subslice is within b. 67 func Contains(b, subslice []byte) bool { 68 return Index(b, subslice) != -1 69 } 70 71 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. 72 func ContainsAny(b []byte, chars string) bool { 73 return IndexAny(b, chars) >= 0 74 } 75 76 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. 77 func ContainsRune(b []byte, r rune) bool { 78 return IndexRune(b, r) >= 0 79 } 80 81 func indexBytePortable(s []byte, c byte) int { 82 for i, b := range s { 83 if b == c { 84 return i 85 } 86 } 87 return -1 88 } 89 90 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 91 func LastIndex(s, sep []byte) int { 92 n := len(sep) 93 if n == 0 { 94 return len(s) 95 } 96 c := sep[0] 97 for i := len(s) - n; i >= 0; i-- { 98 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { 99 return i 100 } 101 } 102 return -1 103 } 104 105 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 106 func LastIndexByte(s []byte, c byte) int { 107 for i := len(s) - 1; i >= 0; i-- { 108 if s[i] == c { 109 return i 110 } 111 } 112 return -1 113 } 114 115 // IndexRune interprets s as a sequence of UTF-8-encoded code points. 116 // It returns the byte index of the first occurrence in s of the given rune. 117 // It returns -1 if rune is not present in s. 118 // If r is utf8.RuneError, it returns the first instance of any 119 // invalid UTF-8 byte sequence. 120 func IndexRune(s []byte, r rune) int { 121 switch { 122 case 0 <= r && r < utf8.RuneSelf: 123 return IndexByte(s, byte(r)) 124 case r == utf8.RuneError: 125 for i := 0; i < len(s); { 126 r1, n := utf8.DecodeRune(s[i:]) 127 if r1 == utf8.RuneError { 128 return i 129 } 130 i += n 131 } 132 return -1 133 case !utf8.ValidRune(r): 134 return -1 135 default: 136 var b [utf8.UTFMax]byte 137 n := utf8.EncodeRune(b[:], r) 138 return Index(s, b[:n]) 139 } 140 } 141 142 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 143 // It returns the byte index of the first occurrence in s of any of the Unicode 144 // code points in chars. It returns -1 if chars is empty or if there is no code 145 // point in common. 146 func IndexAny(s []byte, chars string) int { 147 if chars == "" { 148 // Avoid scanning all of s. 149 return -1 150 } 151 if len(s) > 8 { 152 if as, isASCII := makeASCIISet(chars); isASCII { 153 for i, c := range s { 154 if as.contains(c) { 155 return i 156 } 157 } 158 return -1 159 } 160 } 161 var width int 162 for i := 0; i < len(s); i += width { 163 r := rune(s[i]) 164 if r < utf8.RuneSelf { 165 width = 1 166 } else { 167 r, width = utf8.DecodeRune(s[i:]) 168 } 169 for _, ch := range chars { 170 if r == ch { 171 return i 172 } 173 } 174 } 175 return -1 176 } 177 178 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 179 // points. It returns the byte index of the last occurrence in s of any of 180 // the Unicode code points in chars. It returns -1 if chars is empty or if 181 // there is no code point in common. 182 func LastIndexAny(s []byte, chars string) int { 183 if chars == "" { 184 // Avoid scanning all of s. 185 return -1 186 } 187 if len(s) > 8 { 188 if as, isASCII := makeASCIISet(chars); isASCII { 189 for i := len(s) - 1; i >= 0; i-- { 190 if as.contains(s[i]) { 191 return i 192 } 193 } 194 return -1 195 } 196 } 197 for i := len(s); i > 0; { 198 r, size := utf8.DecodeLastRune(s[:i]) 199 i -= size 200 for _, c := range chars { 201 if r == c { 202 return i 203 } 204 } 205 } 206 return -1 207 } 208 209 // Generic split: splits after each instance of sep, 210 // including sepSave bytes of sep in the subslices. 211 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 212 if n == 0 { 213 return nil 214 } 215 if len(sep) == 0 { 216 return explode(s, n) 217 } 218 if n < 0 { 219 n = Count(s, sep) + 1 220 } 221 222 a := make([][]byte, n) 223 n-- 224 i := 0 225 for i < n { 226 m := Index(s, sep) 227 if m < 0 { 228 break 229 } 230 a[i] = s[: m+sepSave : m+sepSave] 231 s = s[m+len(sep):] 232 i++ 233 } 234 a[i] = s 235 return a[:i+1] 236 } 237 238 // SplitN slices s into subslices separated by sep and returns a slice of 239 // the subslices between those separators. 240 // If sep is empty, SplitN splits after each UTF-8 sequence. 241 // The count determines the number of subslices to return: 242 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 243 // n == 0: the result is nil (zero subslices) 244 // n < 0: all subslices 245 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 246 247 // SplitAfterN slices s into subslices after each instance of sep and 248 // returns a slice of those subslices. 249 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 250 // The count determines the number of subslices to return: 251 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 252 // n == 0: the result is nil (zero subslices) 253 // n < 0: all subslices 254 func SplitAfterN(s, sep []byte, n int) [][]byte { 255 return genSplit(s, sep, len(sep), n) 256 } 257 258 // Split slices s into all subslices separated by sep and returns a slice of 259 // the subslices between those separators. 260 // If sep is empty, Split splits after each UTF-8 sequence. 261 // It is equivalent to SplitN with a count of -1. 262 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 263 264 // SplitAfter slices s into all subslices after each instance of sep and 265 // returns a slice of those subslices. 266 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 267 // It is equivalent to SplitAfterN with a count of -1. 268 func SplitAfter(s, sep []byte) [][]byte { 269 return genSplit(s, sep, len(sep), -1) 270 } 271 272 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} 273 274 // Fields interprets s as a sequence of UTF-8-encoded code points. 275 // It splits the slice s around each instance of one or more consecutive white space 276 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an 277 // empty slice if s contains only white space. 278 func Fields(s []byte) [][]byte { 279 // First count the fields. 280 // This is an exact count if s is ASCII, otherwise it is an approximation. 281 n := 0 282 wasSpace := 1 283 // setBits is used to track which bits are set in the bytes of s. 284 setBits := uint8(0) 285 for i := 0; i < len(s); i++ { 286 r := s[i] 287 setBits |= r 288 isSpace := int(asciiSpace[r]) 289 n += wasSpace & ^isSpace 290 wasSpace = isSpace 291 } 292 293 if setBits >= utf8.RuneSelf { 294 // Some runes in the input slice are not ASCII. 295 return FieldsFunc(s, unicode.IsSpace) 296 } 297 298 // ASCII fast path 299 a := make([][]byte, n) 300 na := 0 301 fieldStart := 0 302 i := 0 303 // Skip spaces in the front of the input. 304 for i < len(s) && asciiSpace[s[i]] != 0 { 305 i++ 306 } 307 fieldStart = i 308 for i < len(s) { 309 if asciiSpace[s[i]] == 0 { 310 i++ 311 continue 312 } 313 a[na] = s[fieldStart:i:i] 314 na++ 315 i++ 316 // Skip spaces in between fields. 317 for i < len(s) && asciiSpace[s[i]] != 0 { 318 i++ 319 } 320 fieldStart = i 321 } 322 if fieldStart < len(s) { // Last field might end at EOF. 323 a[na] = s[fieldStart:len(s):len(s)] 324 } 325 return a 326 } 327 328 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points. 329 // It splits the slice s at each run of code points c satisfying f(c) and 330 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 331 // len(s) == 0, an empty slice is returned. 332 // FieldsFunc makes no guarantees about the order in which it calls f(c). 333 // If f does not return consistent results for a given c, FieldsFunc may crash. 334 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 335 // A span is used to record a slice of s of the form s[start:end]. 336 // The start index is inclusive and the end index is exclusive. 337 type span struct { 338 start int 339 end int 340 } 341 spans := make([]span, 0, 32) 342 343 // Find the field start and end indices. 344 wasField := false 345 fromIndex := 0 346 for i := 0; i < len(s); { 347 size := 1 348 r := rune(s[i]) 349 if r >= utf8.RuneSelf { 350 r, size = utf8.DecodeRune(s[i:]) 351 } 352 if f(r) { 353 if wasField { 354 spans = append(spans, span{start: fromIndex, end: i}) 355 wasField = false 356 } 357 } else { 358 if !wasField { 359 fromIndex = i 360 wasField = true 361 } 362 } 363 i += size 364 } 365 366 // Last field might end at EOF. 367 if wasField { 368 spans = append(spans, span{fromIndex, len(s)}) 369 } 370 371 // Create subslices from recorded field indices. 372 a := make([][]byte, len(spans)) 373 for i, span := range spans { 374 a[i] = s[span.start:span.end:span.end] 375 } 376 377 return a 378 } 379 380 // Join concatenates the elements of s to create a new byte slice. The separator 381 // sep is placed between elements in the resulting slice. 382 func Join(s [][]byte, sep []byte) []byte { 383 if len(s) == 0 { 384 return []byte{} 385 } 386 if len(s) == 1 { 387 // Just return a copy. 388 return append([]byte(nil), s[0]...) 389 } 390 n := len(sep) * (len(s) - 1) 391 for _, v := range s { 392 n += len(v) 393 } 394 395 b := make([]byte, n) 396 bp := copy(b, s[0]) 397 for _, v := range s[1:] { 398 bp += copy(b[bp:], sep) 399 bp += copy(b[bp:], v) 400 } 401 return b 402 } 403 404 // HasPrefix tests whether the byte slice s begins with prefix. 405 func HasPrefix(s, prefix []byte) bool { 406 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 407 } 408 409 // HasSuffix tests whether the byte slice s ends with suffix. 410 func HasSuffix(s, suffix []byte) bool { 411 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 412 } 413 414 // Map returns a copy of the byte slice s with all its characters modified 415 // according to the mapping function. If mapping returns a negative value, the character is 416 // dropped from the byte slice with no replacement. The characters in s and the 417 // output are interpreted as UTF-8-encoded code points. 418 func Map(mapping func(r rune) rune, s []byte) []byte { 419 // In the worst case, the slice can grow when mapped, making 420 // things unpleasant. But it's so rare we barge in assuming it's 421 // fine. It could also shrink but that falls out naturally. 422 maxbytes := len(s) // length of b 423 nbytes := 0 // number of bytes encoded in b 424 b := make([]byte, maxbytes) 425 for i := 0; i < len(s); { 426 wid := 1 427 r := rune(s[i]) 428 if r >= utf8.RuneSelf { 429 r, wid = utf8.DecodeRune(s[i:]) 430 } 431 r = mapping(r) 432 if r >= 0 { 433 rl := utf8.RuneLen(r) 434 if rl < 0 { 435 rl = len(string(utf8.RuneError)) 436 } 437 if nbytes+rl > maxbytes { 438 // Grow the buffer. 439 maxbytes = maxbytes*2 + utf8.UTFMax 440 nb := make([]byte, maxbytes) 441 copy(nb, b[0:nbytes]) 442 b = nb 443 } 444 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 445 } 446 i += wid 447 } 448 return b[0:nbytes] 449 } 450 451 // Repeat returns a new byte slice consisting of count copies of b. 452 // 453 // It panics if count is negative or if 454 // the result of (len(b) * count) overflows. 455 func Repeat(b []byte, count int) []byte { 456 // Since we cannot return an error on overflow, 457 // we should panic if the repeat will generate 458 // an overflow. 459 // See Issue golang.org/issue/16237. 460 if count < 0 { 461 panic("bytes: negative Repeat count") 462 } else if count > 0 && len(b)*count/count != len(b) { 463 panic("bytes: Repeat count causes overflow") 464 } 465 466 nb := make([]byte, len(b)*count) 467 bp := copy(nb, b) 468 for bp < len(nb) { 469 copy(nb[bp:], nb[:bp]) 470 bp *= 2 471 } 472 return nb 473 } 474 475 // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case. 476 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } 477 478 // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case. 479 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } 480 481 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. 482 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 483 484 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 485 // upper case, giving priority to the special casing rules. 486 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { 487 return Map(func(r rune) rune { return c.ToUpper(r) }, s) 488 } 489 490 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 491 // lower case, giving priority to the special casing rules. 492 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { 493 return Map(func(r rune) rune { return c.ToLower(r) }, s) 494 } 495 496 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their 497 // title case, giving priority to the special casing rules. 498 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { 499 return Map(func(r rune) rune { return c.ToTitle(r) }, s) 500 } 501 502 // isSeparator reports whether the rune could mark a word boundary. 503 // TODO: update when package unicode captures more of the properties. 504 func isSeparator(r rune) bool { 505 // ASCII alphanumerics and underscore are not separators 506 if r <= 0x7F { 507 switch { 508 case '0' <= r && r <= '9': 509 return false 510 case 'a' <= r && r <= 'z': 511 return false 512 case 'A' <= r && r <= 'Z': 513 return false 514 case r == '_': 515 return false 516 } 517 return true 518 } 519 // Letters and digits are not separators 520 if unicode.IsLetter(r) || unicode.IsDigit(r) { 521 return false 522 } 523 // Otherwise, all we can do for now is treat spaces as separators. 524 return unicode.IsSpace(r) 525 } 526 527 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin 528 // words mapped to their title case. 529 // 530 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 531 func Title(s []byte) []byte { 532 // Use a closure here to remember state. 533 // Hackish but effective. Depends on Map scanning in order and calling 534 // the closure once per rune. 535 prev := ' ' 536 return Map( 537 func(r rune) rune { 538 if isSeparator(prev) { 539 prev = r 540 return unicode.ToTitle(r) 541 } 542 prev = r 543 return r 544 }, 545 s) 546 } 547 548 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off 549 // all leading UTF-8-encoded code points c that satisfy f(c). 550 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 551 i := indexFunc(s, f, false) 552 if i == -1 { 553 return nil 554 } 555 return s[i:] 556 } 557 558 // TrimRightFunc returns a subslice of s by slicing off all trailing 559 // UTF-8-encoded code points c that satisfy f(c). 560 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 561 i := lastIndexFunc(s, f, false) 562 if i >= 0 && s[i] >= utf8.RuneSelf { 563 _, wid := utf8.DecodeRune(s[i:]) 564 i += wid 565 } else { 566 i++ 567 } 568 return s[0:i] 569 } 570 571 // TrimFunc returns a subslice of s by slicing off all leading and trailing 572 // UTF-8-encoded code points c that satisfy f(c). 573 func TrimFunc(s []byte, f func(r rune) bool) []byte { 574 return TrimRightFunc(TrimLeftFunc(s, f), f) 575 } 576 577 // TrimPrefix returns s without the provided leading prefix string. 578 // If s doesn't start with prefix, s is returned unchanged. 579 func TrimPrefix(s, prefix []byte) []byte { 580 if HasPrefix(s, prefix) { 581 return s[len(prefix):] 582 } 583 return s 584 } 585 586 // TrimSuffix returns s without the provided trailing suffix string. 587 // If s doesn't end with suffix, s is returned unchanged. 588 func TrimSuffix(s, suffix []byte) []byte { 589 if HasSuffix(s, suffix) { 590 return s[:len(s)-len(suffix)] 591 } 592 return s 593 } 594 595 // IndexFunc interprets s as a sequence of UTF-8-encoded code points. 596 // It returns the byte index in s of the first Unicode 597 // code point satisfying f(c), or -1 if none do. 598 func IndexFunc(s []byte, f func(r rune) bool) int { 599 return indexFunc(s, f, true) 600 } 601 602 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. 603 // It returns the byte index in s of the last Unicode 604 // code point satisfying f(c), or -1 if none do. 605 func LastIndexFunc(s []byte, f func(r rune) bool) int { 606 return lastIndexFunc(s, f, true) 607 } 608 609 // indexFunc is the same as IndexFunc except that if 610 // truth==false, the sense of the predicate function is 611 // inverted. 612 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 613 start := 0 614 for start < len(s) { 615 wid := 1 616 r := rune(s[start]) 617 if r >= utf8.RuneSelf { 618 r, wid = utf8.DecodeRune(s[start:]) 619 } 620 if f(r) == truth { 621 return start 622 } 623 start += wid 624 } 625 return -1 626 } 627 628 // lastIndexFunc is the same as LastIndexFunc except that if 629 // truth==false, the sense of the predicate function is 630 // inverted. 631 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 632 for i := len(s); i > 0; { 633 r, size := rune(s[i-1]), 1 634 if r >= utf8.RuneSelf { 635 r, size = utf8.DecodeLastRune(s[0:i]) 636 } 637 i -= size 638 if f(r) == truth { 639 return i 640 } 641 } 642 return -1 643 } 644 645 // asciiSet is a 32-byte value, where each bit represents the presence of a 646 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 647 // starting with the least-significant bit of the lowest word to the 648 // most-significant bit of the highest word, map to the full range of all 649 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 650 // ensuring that any non-ASCII character will be reported as not in the set. 651 type asciiSet [8]uint32 652 653 // makeASCIISet creates a set of ASCII characters and reports whether all 654 // characters in chars are ASCII. 655 func makeASCIISet(chars string) (as asciiSet, ok bool) { 656 for i := 0; i < len(chars); i++ { 657 c := chars[i] 658 if c >= utf8.RuneSelf { 659 return as, false 660 } 661 as[c>>5] |= 1 << uint(c&31) 662 } 663 return as, true 664 } 665 666 // contains reports whether c is inside the set. 667 func (as *asciiSet) contains(c byte) bool { 668 return (as[c>>5] & (1 << uint(c&31))) != 0 669 } 670 671 func makeCutsetFunc(cutset string) func(r rune) bool { 672 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { 673 return func(r rune) bool { 674 return r == rune(cutset[0]) 675 } 676 } 677 if as, isASCII := makeASCIISet(cutset); isASCII { 678 return func(r rune) bool { 679 return r < utf8.RuneSelf && as.contains(byte(r)) 680 } 681 } 682 return func(r rune) bool { 683 for _, c := range cutset { 684 if c == r { 685 return true 686 } 687 } 688 return false 689 } 690 } 691 692 // Trim returns a subslice of s by slicing off all leading and 693 // trailing UTF-8-encoded code points contained in cutset. 694 func Trim(s []byte, cutset string) []byte { 695 return TrimFunc(s, makeCutsetFunc(cutset)) 696 } 697 698 // TrimLeft returns a subslice of s by slicing off all leading 699 // UTF-8-encoded code points contained in cutset. 700 func TrimLeft(s []byte, cutset string) []byte { 701 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 702 } 703 704 // TrimRight returns a subslice of s by slicing off all trailing 705 // UTF-8-encoded code points that are contained in cutset. 706 func TrimRight(s []byte, cutset string) []byte { 707 return TrimRightFunc(s, makeCutsetFunc(cutset)) 708 } 709 710 // TrimSpace returns a subslice of s by slicing off all leading and 711 // trailing white space, as defined by Unicode. 712 func TrimSpace(s []byte) []byte { 713 return TrimFunc(s, unicode.IsSpace) 714 } 715 716 // Runes interprets s as a sequence of UTF-8-encoded code points. 717 // It returns a slice of runes (Unicode code points) equivalent to s. 718 func Runes(s []byte) []rune { 719 t := make([]rune, utf8.RuneCount(s)) 720 i := 0 721 for len(s) > 0 { 722 r, l := utf8.DecodeRune(s) 723 t[i] = r 724 i++ 725 s = s[l:] 726 } 727 return t 728 } 729 730 // Replace returns a copy of the slice s with the first n 731 // non-overlapping instances of old replaced by new. 732 // If old is empty, it matches at the beginning of the slice 733 // and after each UTF-8 sequence, yielding up to k+1 replacements 734 // for a k-rune slice. 735 // If n < 0, there is no limit on the number of replacements. 736 func Replace(s, old, new []byte, n int) []byte { 737 m := 0 738 if n != 0 { 739 // Compute number of replacements. 740 m = Count(s, old) 741 } 742 if m == 0 { 743 // Just return a copy. 744 return append([]byte(nil), s...) 745 } 746 if n < 0 || m < n { 747 n = m 748 } 749 750 // Apply replacements to buffer. 751 t := make([]byte, len(s)+n*(len(new)-len(old))) 752 w := 0 753 start := 0 754 for i := 0; i < n; i++ { 755 j := start 756 if len(old) == 0 { 757 if i > 0 { 758 _, wid := utf8.DecodeRune(s[start:]) 759 j += wid 760 } 761 } else { 762 j += Index(s[start:], old) 763 } 764 w += copy(t[w:], s[start:j]) 765 w += copy(t[w:], new) 766 start = j + len(old) 767 } 768 w += copy(t[w:], s[start:]) 769 return t[0:w] 770 } 771 772 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 773 // are equal under Unicode case-folding. 774 func EqualFold(s, t []byte) bool { 775 for len(s) != 0 && len(t) != 0 { 776 // Extract first rune from each. 777 var sr, tr rune 778 if s[0] < utf8.RuneSelf { 779 sr, s = rune(s[0]), s[1:] 780 } else { 781 r, size := utf8.DecodeRune(s) 782 sr, s = r, s[size:] 783 } 784 if t[0] < utf8.RuneSelf { 785 tr, t = rune(t[0]), t[1:] 786 } else { 787 r, size := utf8.DecodeRune(t) 788 tr, t = r, t[size:] 789 } 790 791 // If they match, keep going; if not, return false. 792 793 // Easy case. 794 if tr == sr { 795 continue 796 } 797 798 // Make sr < tr to simplify what follows. 799 if tr < sr { 800 tr, sr = sr, tr 801 } 802 // Fast check for ASCII. 803 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 804 // ASCII, and sr is upper case. tr must be lower case. 805 if tr == sr+'a'-'A' { 806 continue 807 } 808 return false 809 } 810 811 // General case. SimpleFold(x) returns the next equivalent rune > x 812 // or wraps around to smaller values. 813 r := unicode.SimpleFold(sr) 814 for r != sr && r < tr { 815 r = unicode.SimpleFold(r) 816 } 817 if r == tr { 818 continue 819 } 820 return false 821 } 822 823 // One string is empty. Are both? 824 return len(s) == len(t) 825 } 826 827 func indexRabinKarp(s, sep []byte) int { 828 // Rabin-Karp search 829 hashsep, pow := hashStr(sep) 830 n := len(sep) 831 var h uint32 832 for i := 0; i < n; i++ { 833 h = h*primeRK + uint32(s[i]) 834 } 835 if h == hashsep && Equal(s[:n], sep) { 836 return 0 837 } 838 for i := n; i < len(s); { 839 h *= primeRK 840 h += uint32(s[i]) 841 h -= pow * uint32(s[i-n]) 842 i++ 843 if h == hashsep && Equal(s[i-n:i], sep) { 844 return i - n 845 } 846 } 847 return -1 848 } 849 850 // primeRK is the prime base used in Rabin-Karp algorithm. 851 const primeRK = 16777619 852 853 // hashStr returns the hash and the appropriate multiplicative 854 // factor for use in Rabin-Karp algorithm. 855 func hashStr(sep []byte) (uint32, uint32) { 856 hash := uint32(0) 857 for i := 0; i < len(sep); i++ { 858 hash = hash*primeRK + uint32(sep[i]) 859 } 860 var pow, sq uint32 = 1, primeRK 861 for i := len(sep); i > 0; i >>= 1 { 862 if i&1 != 0 { 863 pow *= sq 864 } 865 sq *= sq 866 } 867 return hash, pow 868 }