github.com/euank/go@v0.0.0-20160829210321-495514729181/src/bytes/bytes.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 func equalPortable(a, b []byte) bool { 15 if len(a) != len(b) { 16 return false 17 } 18 for i, c := range a { 19 if c != b[i] { 20 return false 21 } 22 } 23 return true 24 } 25 26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 28 func explode(s []byte, n int) [][]byte { 29 if n <= 0 { 30 n = len(s) 31 } 32 a := make([][]byte, n) 33 var size int 34 na := 0 35 for len(s) > 0 { 36 if na+1 >= n { 37 a[na] = s 38 na++ 39 break 40 } 41 _, size = utf8.DecodeRune(s) 42 a[na] = s[0:size] 43 s = s[size:] 44 na++ 45 } 46 return a[0:na] 47 } 48 49 // Count counts the number of non-overlapping instances of sep in s. 50 // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. 51 func Count(s, sep []byte) int { 52 n := len(sep) 53 if n == 0 { 54 return utf8.RuneCount(s) + 1 55 } 56 if n > len(s) { 57 return 0 58 } 59 count := 0 60 c := sep[0] 61 i := 0 62 t := s[:len(s)-n+1] 63 for i < len(t) { 64 if t[i] != c { 65 o := IndexByte(t[i:], c) 66 if o < 0 { 67 break 68 } 69 i += o 70 } 71 if n == 1 || Equal(s[i:i+n], sep) { 72 count++ 73 i += n 74 continue 75 } 76 i++ 77 } 78 return count 79 } 80 81 // Contains reports whether subslice is within b. 82 func Contains(b, subslice []byte) bool { 83 return Index(b, subslice) != -1 84 } 85 86 // ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b. 87 func ContainsAny(b []byte, chars string) bool { 88 return IndexAny(b, chars) >= 0 89 } 90 91 // ContainsRune reports whether the Unicode code point r is within b. 92 func ContainsRune(b []byte, r rune) bool { 93 return IndexRune(b, r) >= 0 94 } 95 96 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 97 func Index(s, sep []byte) int { 98 n := len(sep) 99 if n == 0 { 100 return 0 101 } 102 if n > len(s) { 103 return -1 104 } 105 c := sep[0] 106 if n == 1 { 107 return IndexByte(s, c) 108 } 109 i := 0 110 t := s[:len(s)-n+1] 111 for i < len(t) { 112 if t[i] != c { 113 o := IndexByte(t[i:], c) 114 if o < 0 { 115 break 116 } 117 i += o 118 } 119 if Equal(s[i:i+n], sep) { 120 return i 121 } 122 i++ 123 } 124 return -1 125 } 126 127 func indexBytePortable(s []byte, c byte) int { 128 for i, b := range s { 129 if b == c { 130 return i 131 } 132 } 133 return -1 134 } 135 136 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 137 func LastIndex(s, sep []byte) int { 138 n := len(sep) 139 if n == 0 { 140 return len(s) 141 } 142 c := sep[0] 143 for i := len(s) - n; i >= 0; i-- { 144 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { 145 return i 146 } 147 } 148 return -1 149 } 150 151 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 152 func LastIndexByte(s []byte, c byte) int { 153 for i := len(s) - 1; i >= 0; i-- { 154 if s[i] == c { 155 return i 156 } 157 } 158 return -1 159 } 160 161 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. 162 // It returns the byte index of the first occurrence in s of the given rune. 163 // It returns -1 if rune is not present in s. 164 func IndexRune(s []byte, r rune) int { 165 for i := 0; i < len(s); { 166 r1, size := utf8.DecodeRune(s[i:]) 167 if r == r1 { 168 return i 169 } 170 i += size 171 } 172 return -1 173 } 174 175 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 176 // It returns the byte index of the first occurrence in s of any of the Unicode 177 // code points in chars. It returns -1 if chars is empty or if there is no code 178 // point in common. 179 func IndexAny(s []byte, chars string) int { 180 if len(chars) > 0 { 181 var r rune 182 var width int 183 for i := 0; i < len(s); i += width { 184 r = rune(s[i]) 185 if r < utf8.RuneSelf { 186 width = 1 187 } else { 188 r, width = utf8.DecodeRune(s[i:]) 189 } 190 for _, ch := range chars { 191 if r == ch { 192 return i 193 } 194 } 195 } 196 } 197 return -1 198 } 199 200 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 201 // points. It returns the byte index of the last occurrence in s of any of 202 // the Unicode code points in chars. It returns -1 if chars is empty or if 203 // there is no code point in common. 204 func LastIndexAny(s []byte, chars string) int { 205 if len(chars) > 0 { 206 for i := len(s); i > 0; { 207 r, size := utf8.DecodeLastRune(s[0:i]) 208 i -= size 209 for _, ch := range chars { 210 if r == ch { 211 return i 212 } 213 } 214 } 215 } 216 return -1 217 } 218 219 // Generic split: splits after each instance of sep, 220 // including sepSave bytes of sep in the subslices. 221 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 222 if n == 0 { 223 return nil 224 } 225 if len(sep) == 0 { 226 return explode(s, n) 227 } 228 if n < 0 { 229 n = Count(s, sep) + 1 230 } 231 c := sep[0] 232 start := 0 233 a := make([][]byte, n) 234 na := 0 235 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { 236 if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { 237 a[na] = s[start : i+sepSave] 238 na++ 239 start = i + len(sep) 240 i += len(sep) - 1 241 } 242 } 243 a[na] = s[start:] 244 return a[0 : na+1] 245 } 246 247 // SplitN slices s into subslices separated by sep and returns a slice of 248 // the subslices between those separators. 249 // If sep is empty, SplitN splits after each UTF-8 sequence. 250 // The count determines the number of subslices to return: 251 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 252 // n == 0: the result is nil (zero subslices) 253 // n < 0: all subslices 254 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 255 256 // SplitAfterN slices s into subslices after each instance of sep and 257 // returns a slice of those subslices. 258 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 259 // The count determines the number of subslices to return: 260 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 261 // n == 0: the result is nil (zero subslices) 262 // n < 0: all subslices 263 func SplitAfterN(s, sep []byte, n int) [][]byte { 264 return genSplit(s, sep, len(sep), n) 265 } 266 267 // Split slices s into all subslices separated by sep and returns a slice of 268 // the subslices between those separators. 269 // If sep is empty, Split splits after each UTF-8 sequence. 270 // It is equivalent to SplitN with a count of -1. 271 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 272 273 // SplitAfter slices s into all subslices after each instance of sep and 274 // returns a slice of those subslices. 275 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 276 // It is equivalent to SplitAfterN with a count of -1. 277 func SplitAfter(s, sep []byte) [][]byte { 278 return genSplit(s, sep, len(sep), -1) 279 } 280 281 // Fields splits the slice s around each instance of one or more consecutive white space 282 // characters, returning a slice of subslices of s or an empty list if s contains only white space. 283 func Fields(s []byte) [][]byte { 284 return FieldsFunc(s, unicode.IsSpace) 285 } 286 287 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 288 // It splits the slice s at each run of code points c satisfying f(c) and 289 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 290 // len(s) == 0, an empty slice is returned. 291 // FieldsFunc makes no guarantees about the order in which it calls f(c). 292 // If f does not return consistent results for a given c, FieldsFunc may crash. 293 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 294 n := 0 295 inField := false 296 for i := 0; i < len(s); { 297 r, size := utf8.DecodeRune(s[i:]) 298 wasInField := inField 299 inField = !f(r) 300 if inField && !wasInField { 301 n++ 302 } 303 i += size 304 } 305 306 a := make([][]byte, n) 307 na := 0 308 fieldStart := -1 309 for i := 0; i <= len(s) && na < n; { 310 r, size := utf8.DecodeRune(s[i:]) 311 if fieldStart < 0 && size > 0 && !f(r) { 312 fieldStart = i 313 i += size 314 continue 315 } 316 if fieldStart >= 0 && (size == 0 || f(r)) { 317 a[na] = s[fieldStart:i] 318 na++ 319 fieldStart = -1 320 } 321 if size == 0 { 322 break 323 } 324 i += size 325 } 326 return a[0:na] 327 } 328 329 // Join concatenates the elements of s to create a new byte slice. The separator 330 // sep is placed between elements in the resulting slice. 331 func Join(s [][]byte, sep []byte) []byte { 332 if len(s) == 0 { 333 return []byte{} 334 } 335 if len(s) == 1 { 336 // Just return a copy. 337 return append([]byte(nil), s[0]...) 338 } 339 n := len(sep) * (len(s) - 1) 340 for _, v := range s { 341 n += len(v) 342 } 343 344 b := make([]byte, n) 345 bp := copy(b, s[0]) 346 for _, v := range s[1:] { 347 bp += copy(b[bp:], sep) 348 bp += copy(b[bp:], v) 349 } 350 return b 351 } 352 353 // HasPrefix tests whether the byte slice s begins with prefix. 354 func HasPrefix(s, prefix []byte) bool { 355 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 356 } 357 358 // HasSuffix tests whether the byte slice s ends with suffix. 359 func HasSuffix(s, suffix []byte) bool { 360 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 361 } 362 363 // Map returns a copy of the byte slice s with all its characters modified 364 // according to the mapping function. If mapping returns a negative value, the character is 365 // dropped from the string with no replacement. The characters in s and the 366 // output are interpreted as UTF-8-encoded Unicode code points. 367 func Map(mapping func(r rune) rune, s []byte) []byte { 368 // In the worst case, the slice can grow when mapped, making 369 // things unpleasant. But it's so rare we barge in assuming it's 370 // fine. It could also shrink but that falls out naturally. 371 maxbytes := len(s) // length of b 372 nbytes := 0 // number of bytes encoded in b 373 b := make([]byte, maxbytes) 374 for i := 0; i < len(s); { 375 wid := 1 376 r := rune(s[i]) 377 if r >= utf8.RuneSelf { 378 r, wid = utf8.DecodeRune(s[i:]) 379 } 380 r = mapping(r) 381 if r >= 0 { 382 rl := utf8.RuneLen(r) 383 if rl < 0 { 384 rl = len(string(utf8.RuneError)) 385 } 386 if nbytes+rl > maxbytes { 387 // Grow the buffer. 388 maxbytes = maxbytes*2 + utf8.UTFMax 389 nb := make([]byte, maxbytes) 390 copy(nb, b[0:nbytes]) 391 b = nb 392 } 393 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 394 } 395 i += wid 396 } 397 return b[0:nbytes] 398 } 399 400 // Repeat returns a new byte slice consisting of count copies of b. 401 func Repeat(b []byte, count int) []byte { 402 nb := make([]byte, len(b)*count) 403 bp := copy(nb, b) 404 for bp < len(nb) { 405 copy(nb[bp:], nb[:bp]) 406 bp *= 2 407 } 408 return nb 409 } 410 411 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. 412 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } 413 414 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. 415 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } 416 417 // ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. 418 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 419 420 // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 421 // upper case, giving priority to the special casing rules. 422 func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { 423 return Map(func(r rune) rune { return _case.ToUpper(r) }, s) 424 } 425 426 // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 427 // lower case, giving priority to the special casing rules. 428 func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { 429 return Map(func(r rune) rune { return _case.ToLower(r) }, s) 430 } 431 432 // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 433 // title case, giving priority to the special casing rules. 434 func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { 435 return Map(func(r rune) rune { return _case.ToTitle(r) }, s) 436 } 437 438 // isSeparator reports whether the rune could mark a word boundary. 439 // TODO: update when package unicode captures more of the properties. 440 func isSeparator(r rune) bool { 441 // ASCII alphanumerics and underscore are not separators 442 if r <= 0x7F { 443 switch { 444 case '0' <= r && r <= '9': 445 return false 446 case 'a' <= r && r <= 'z': 447 return false 448 case 'A' <= r && r <= 'Z': 449 return false 450 case r == '_': 451 return false 452 } 453 return true 454 } 455 // Letters and digits are not separators 456 if unicode.IsLetter(r) || unicode.IsDigit(r) { 457 return false 458 } 459 // Otherwise, all we can do for now is treat spaces as separators. 460 return unicode.IsSpace(r) 461 } 462 463 // Title returns a copy of s with all Unicode letters that begin words 464 // mapped to their title case. 465 // 466 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 467 func Title(s []byte) []byte { 468 // Use a closure here to remember state. 469 // Hackish but effective. Depends on Map scanning in order and calling 470 // the closure once per rune. 471 prev := ' ' 472 return Map( 473 func(r rune) rune { 474 if isSeparator(prev) { 475 prev = r 476 return unicode.ToTitle(r) 477 } 478 prev = r 479 return r 480 }, 481 s) 482 } 483 484 // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded 485 // Unicode code points c that satisfy f(c). 486 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 487 i := indexFunc(s, f, false) 488 if i == -1 { 489 return nil 490 } 491 return s[i:] 492 } 493 494 // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 495 // encoded Unicode code points c that satisfy f(c). 496 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 497 i := lastIndexFunc(s, f, false) 498 if i >= 0 && s[i] >= utf8.RuneSelf { 499 _, wid := utf8.DecodeRune(s[i:]) 500 i += wid 501 } else { 502 i++ 503 } 504 return s[0:i] 505 } 506 507 // TrimFunc returns a subslice of s by slicing off all leading and trailing 508 // UTF-8-encoded Unicode code points c that satisfy f(c). 509 func TrimFunc(s []byte, f func(r rune) bool) []byte { 510 return TrimRightFunc(TrimLeftFunc(s, f), f) 511 } 512 513 // TrimPrefix returns s without the provided leading prefix string. 514 // If s doesn't start with prefix, s is returned unchanged. 515 func TrimPrefix(s, prefix []byte) []byte { 516 if HasPrefix(s, prefix) { 517 return s[len(prefix):] 518 } 519 return s 520 } 521 522 // TrimSuffix returns s without the provided trailing suffix string. 523 // If s doesn't end with suffix, s is returned unchanged. 524 func TrimSuffix(s, suffix []byte) []byte { 525 if HasSuffix(s, suffix) { 526 return s[:len(s)-len(suffix)] 527 } 528 return s 529 } 530 531 // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 532 // It returns the byte index in s of the first Unicode 533 // code point satisfying f(c), or -1 if none do. 534 func IndexFunc(s []byte, f func(r rune) bool) int { 535 return indexFunc(s, f, true) 536 } 537 538 // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 539 // It returns the byte index in s of the last Unicode 540 // code point satisfying f(c), or -1 if none do. 541 func LastIndexFunc(s []byte, f func(r rune) bool) int { 542 return lastIndexFunc(s, f, true) 543 } 544 545 // indexFunc is the same as IndexFunc except that if 546 // truth==false, the sense of the predicate function is 547 // inverted. 548 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 549 start := 0 550 for start < len(s) { 551 wid := 1 552 r := rune(s[start]) 553 if r >= utf8.RuneSelf { 554 r, wid = utf8.DecodeRune(s[start:]) 555 } 556 if f(r) == truth { 557 return start 558 } 559 start += wid 560 } 561 return -1 562 } 563 564 // lastIndexFunc is the same as LastIndexFunc except that if 565 // truth==false, the sense of the predicate function is 566 // inverted. 567 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 568 for i := len(s); i > 0; { 569 r, size := rune(s[i-1]), 1 570 if r >= utf8.RuneSelf { 571 r, size = utf8.DecodeLastRune(s[0:i]) 572 } 573 i -= size 574 if f(r) == truth { 575 return i 576 } 577 } 578 return -1 579 } 580 581 func makeCutsetFunc(cutset string) func(r rune) bool { 582 return func(r rune) bool { 583 for _, c := range cutset { 584 if c == r { 585 return true 586 } 587 } 588 return false 589 } 590 } 591 592 // Trim returns a subslice of s by slicing off all leading and 593 // trailing UTF-8-encoded Unicode code points contained in cutset. 594 func Trim(s []byte, cutset string) []byte { 595 return TrimFunc(s, makeCutsetFunc(cutset)) 596 } 597 598 // TrimLeft returns a subslice of s by slicing off all leading 599 // UTF-8-encoded Unicode code points contained in cutset. 600 func TrimLeft(s []byte, cutset string) []byte { 601 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 602 } 603 604 // TrimRight returns a subslice of s by slicing off all trailing 605 // UTF-8-encoded Unicode code points that are contained in cutset. 606 func TrimRight(s []byte, cutset string) []byte { 607 return TrimRightFunc(s, makeCutsetFunc(cutset)) 608 } 609 610 // TrimSpace returns a subslice of s by slicing off all leading and 611 // trailing white space, as defined by Unicode. 612 func TrimSpace(s []byte) []byte { 613 return TrimFunc(s, unicode.IsSpace) 614 } 615 616 // Runes returns a slice of runes (Unicode code points) equivalent to s. 617 func Runes(s []byte) []rune { 618 t := make([]rune, utf8.RuneCount(s)) 619 i := 0 620 for len(s) > 0 { 621 r, l := utf8.DecodeRune(s) 622 t[i] = r 623 i++ 624 s = s[l:] 625 } 626 return t 627 } 628 629 // Replace returns a copy of the slice s with the first n 630 // non-overlapping instances of old replaced by new. 631 // If old is empty, it matches at the beginning of the slice 632 // and after each UTF-8 sequence, yielding up to k+1 replacements 633 // for a k-rune slice. 634 // If n < 0, there is no limit on the number of replacements. 635 func Replace(s, old, new []byte, n int) []byte { 636 m := 0 637 if n != 0 { 638 // Compute number of replacements. 639 m = Count(s, old) 640 } 641 if m == 0 { 642 // Just return a copy. 643 return append([]byte(nil), s...) 644 } 645 if n < 0 || m < n { 646 n = m 647 } 648 649 // Apply replacements to buffer. 650 t := make([]byte, len(s)+n*(len(new)-len(old))) 651 w := 0 652 start := 0 653 for i := 0; i < n; i++ { 654 j := start 655 if len(old) == 0 { 656 if i > 0 { 657 _, wid := utf8.DecodeRune(s[start:]) 658 j += wid 659 } 660 } else { 661 j += Index(s[start:], old) 662 } 663 w += copy(t[w:], s[start:j]) 664 w += copy(t[w:], new) 665 start = j + len(old) 666 } 667 w += copy(t[w:], s[start:]) 668 return t[0:w] 669 } 670 671 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 672 // are equal under Unicode case-folding. 673 func EqualFold(s, t []byte) bool { 674 for len(s) != 0 && len(t) != 0 { 675 // Extract first rune from each. 676 var sr, tr rune 677 if s[0] < utf8.RuneSelf { 678 sr, s = rune(s[0]), s[1:] 679 } else { 680 r, size := utf8.DecodeRune(s) 681 sr, s = r, s[size:] 682 } 683 if t[0] < utf8.RuneSelf { 684 tr, t = rune(t[0]), t[1:] 685 } else { 686 r, size := utf8.DecodeRune(t) 687 tr, t = r, t[size:] 688 } 689 690 // If they match, keep going; if not, return false. 691 692 // Easy case. 693 if tr == sr { 694 continue 695 } 696 697 // Make sr < tr to simplify what follows. 698 if tr < sr { 699 tr, sr = sr, tr 700 } 701 // Fast check for ASCII. 702 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 703 // ASCII, and sr is upper case. tr must be lower case. 704 if tr == sr+'a'-'A' { 705 continue 706 } 707 return false 708 } 709 710 // General case. SimpleFold(x) returns the next equivalent rune > x 711 // or wraps around to smaller values. 712 r := unicode.SimpleFold(sr) 713 for r != sr && r < tr { 714 r = unicode.SimpleFold(r) 715 } 716 if r == tr { 717 continue 718 } 719 return false 720 } 721 722 // One string is empty. Are both? 723 return len(s) == len(t) 724 }