github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/bytes/bytes.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package bytes implements functions for the manipulation of byte slices. 6 // It is analogous to the facilities of the strings package. 7 package bytes 8 9 import ( 10 "unicode" 11 "unicode/utf8" 12 ) 13 14 func equalPortable(a, b []byte) bool { 15 if len(a) != len(b) { 16 return false 17 } 18 for i, c := range a { 19 if c != b[i] { 20 return false 21 } 22 } 23 return true 24 } 25 26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), 27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. 28 func explode(s []byte, n int) [][]byte { 29 if n <= 0 { 30 n = len(s) 31 } 32 a := make([][]byte, n) 33 var size int 34 na := 0 35 for len(s) > 0 { 36 if na+1 >= n { 37 a[na] = s 38 na++ 39 break 40 } 41 _, size = utf8.DecodeRune(s) 42 a[na] = s[0:size] 43 s = s[size:] 44 na++ 45 } 46 return a[0:na] 47 } 48 49 // Count counts the number of non-overlapping instances of sep in s. 50 // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. 51 func Count(s, sep []byte) int { 52 n := len(sep) 53 if n == 0 { 54 return utf8.RuneCount(s) + 1 55 } 56 if n > len(s) { 57 return 0 58 } 59 count := 0 60 c := sep[0] 61 i := 0 62 t := s[:len(s)-n+1] 63 for i < len(t) { 64 if t[i] != c { 65 o := IndexByte(t[i:], c) 66 if o < 0 { 67 break 68 } 69 i += o 70 } 71 if n == 1 || Equal(s[i:i+n], sep) { 72 count++ 73 i += n 74 continue 75 } 76 i++ 77 } 78 return count 79 } 80 81 // Contains reports whether subslice is within b. 82 func Contains(b, subslice []byte) bool { 83 return Index(b, subslice) != -1 84 } 85 86 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. 87 func Index(s, sep []byte) int { 88 n := len(sep) 89 if n == 0 { 90 return 0 91 } 92 if n > len(s) { 93 return -1 94 } 95 c := sep[0] 96 if n == 1 { 97 return IndexByte(s, c) 98 } 99 i := 0 100 t := s[:len(s)-n+1] 101 for i < len(t) { 102 if t[i] != c { 103 o := IndexByte(t[i:], c) 104 if o < 0 { 105 break 106 } 107 i += o 108 } 109 if Equal(s[i:i+n], sep) { 110 return i 111 } 112 i++ 113 } 114 return -1 115 } 116 117 func indexBytePortable(s []byte, c byte) int { 118 for i, b := range s { 119 if b == c { 120 return i 121 } 122 } 123 return -1 124 } 125 126 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. 127 func LastIndex(s, sep []byte) int { 128 n := len(sep) 129 if n == 0 { 130 return len(s) 131 } 132 c := sep[0] 133 for i := len(s) - n; i >= 0; i-- { 134 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { 135 return i 136 } 137 } 138 return -1 139 } 140 141 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. 142 func LastIndexByte(s []byte, c byte) int { 143 for i := len(s) - 1; i >= 0; i-- { 144 if s[i] == c { 145 return i 146 } 147 } 148 return -1 149 } 150 151 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. 152 // It returns the byte index of the first occurrence in s of the given rune. 153 // It returns -1 if rune is not present in s. 154 func IndexRune(s []byte, r rune) int { 155 for i := 0; i < len(s); { 156 r1, size := utf8.DecodeRune(s[i:]) 157 if r == r1 { 158 return i 159 } 160 i += size 161 } 162 return -1 163 } 164 165 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. 166 // It returns the byte index of the first occurrence in s of any of the Unicode 167 // code points in chars. It returns -1 if chars is empty or if there is no code 168 // point in common. 169 func IndexAny(s []byte, chars string) int { 170 if len(chars) > 0 { 171 var r rune 172 var width int 173 for i := 0; i < len(s); i += width { 174 r = rune(s[i]) 175 if r < utf8.RuneSelf { 176 width = 1 177 } else { 178 r, width = utf8.DecodeRune(s[i:]) 179 } 180 for _, ch := range chars { 181 if r == ch { 182 return i 183 } 184 } 185 } 186 } 187 return -1 188 } 189 190 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code 191 // points. It returns the byte index of the last occurrence in s of any of 192 // the Unicode code points in chars. It returns -1 if chars is empty or if 193 // there is no code point in common. 194 func LastIndexAny(s []byte, chars string) int { 195 if len(chars) > 0 { 196 for i := len(s); i > 0; { 197 r, size := utf8.DecodeLastRune(s[0:i]) 198 i -= size 199 for _, ch := range chars { 200 if r == ch { 201 return i 202 } 203 } 204 } 205 } 206 return -1 207 } 208 209 // Generic split: splits after each instance of sep, 210 // including sepSave bytes of sep in the subslices. 211 func genSplit(s, sep []byte, sepSave, n int) [][]byte { 212 if n == 0 { 213 return nil 214 } 215 if len(sep) == 0 { 216 return explode(s, n) 217 } 218 if n < 0 { 219 n = Count(s, sep) + 1 220 } 221 c := sep[0] 222 start := 0 223 a := make([][]byte, n) 224 na := 0 225 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ { 226 if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { 227 a[na] = s[start : i+sepSave] 228 na++ 229 start = i + len(sep) 230 i += len(sep) - 1 231 } 232 } 233 a[na] = s[start:] 234 return a[0 : na+1] 235 } 236 237 // SplitN slices s into subslices separated by sep and returns a slice of 238 // the subslices between those separators. 239 // If sep is empty, SplitN splits after each UTF-8 sequence. 240 // The count determines the number of subslices to return: 241 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 242 // n == 0: the result is nil (zero subslices) 243 // n < 0: all subslices 244 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } 245 246 // SplitAfterN slices s into subslices after each instance of sep and 247 // returns a slice of those subslices. 248 // If sep is empty, SplitAfterN splits after each UTF-8 sequence. 249 // The count determines the number of subslices to return: 250 // n > 0: at most n subslices; the last subslice will be the unsplit remainder. 251 // n == 0: the result is nil (zero subslices) 252 // n < 0: all subslices 253 func SplitAfterN(s, sep []byte, n int) [][]byte { 254 return genSplit(s, sep, len(sep), n) 255 } 256 257 // Split slices s into all subslices separated by sep and returns a slice of 258 // the subslices between those separators. 259 // If sep is empty, Split splits after each UTF-8 sequence. 260 // It is equivalent to SplitN with a count of -1. 261 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } 262 263 // SplitAfter slices s into all subslices after each instance of sep and 264 // returns a slice of those subslices. 265 // If sep is empty, SplitAfter splits after each UTF-8 sequence. 266 // It is equivalent to SplitAfterN with a count of -1. 267 func SplitAfter(s, sep []byte) [][]byte { 268 return genSplit(s, sep, len(sep), -1) 269 } 270 271 // Fields splits the slice s around each instance of one or more consecutive white space 272 // characters, returning a slice of subslices of s or an empty list if s contains only white space. 273 func Fields(s []byte) [][]byte { 274 return FieldsFunc(s, unicode.IsSpace) 275 } 276 277 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 278 // It splits the slice s at each run of code points c satisfying f(c) and 279 // returns a slice of subslices of s. If all code points in s satisfy f(c), or 280 // len(s) == 0, an empty slice is returned. 281 // FieldsFunc makes no guarantees about the order in which it calls f(c). 282 // If f does not return consistent results for a given c, FieldsFunc may crash. 283 func FieldsFunc(s []byte, f func(rune) bool) [][]byte { 284 n := 0 285 inField := false 286 for i := 0; i < len(s); { 287 r, size := utf8.DecodeRune(s[i:]) 288 wasInField := inField 289 inField = !f(r) 290 if inField && !wasInField { 291 n++ 292 } 293 i += size 294 } 295 296 a := make([][]byte, n) 297 na := 0 298 fieldStart := -1 299 for i := 0; i <= len(s) && na < n; { 300 r, size := utf8.DecodeRune(s[i:]) 301 if fieldStart < 0 && size > 0 && !f(r) { 302 fieldStart = i 303 i += size 304 continue 305 } 306 if fieldStart >= 0 && (size == 0 || f(r)) { 307 a[na] = s[fieldStart:i] 308 na++ 309 fieldStart = -1 310 } 311 if size == 0 { 312 break 313 } 314 i += size 315 } 316 return a[0:na] 317 } 318 319 // Join concatenates the elements of s to create a new byte slice. The separator 320 // sep is placed between elements in the resulting slice. 321 func Join(s [][]byte, sep []byte) []byte { 322 if len(s) == 0 { 323 return []byte{} 324 } 325 if len(s) == 1 { 326 // Just return a copy. 327 return append([]byte(nil), s[0]...) 328 } 329 n := len(sep) * (len(s) - 1) 330 for _, v := range s { 331 n += len(v) 332 } 333 334 b := make([]byte, n) 335 bp := copy(b, s[0]) 336 for _, v := range s[1:] { 337 bp += copy(b[bp:], sep) 338 bp += copy(b[bp:], v) 339 } 340 return b 341 } 342 343 // HasPrefix tests whether the byte slice s begins with prefix. 344 func HasPrefix(s, prefix []byte) bool { 345 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) 346 } 347 348 // HasSuffix tests whether the byte slice s ends with suffix. 349 func HasSuffix(s, suffix []byte) bool { 350 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) 351 } 352 353 // Map returns a copy of the byte slice s with all its characters modified 354 // according to the mapping function. If mapping returns a negative value, the character is 355 // dropped from the string with no replacement. The characters in s and the 356 // output are interpreted as UTF-8-encoded Unicode code points. 357 func Map(mapping func(r rune) rune, s []byte) []byte { 358 // In the worst case, the slice can grow when mapped, making 359 // things unpleasant. But it's so rare we barge in assuming it's 360 // fine. It could also shrink but that falls out naturally. 361 maxbytes := len(s) // length of b 362 nbytes := 0 // number of bytes encoded in b 363 b := make([]byte, maxbytes) 364 for i := 0; i < len(s); { 365 wid := 1 366 r := rune(s[i]) 367 if r >= utf8.RuneSelf { 368 r, wid = utf8.DecodeRune(s[i:]) 369 } 370 r = mapping(r) 371 if r >= 0 { 372 rl := utf8.RuneLen(r) 373 if rl < 0 { 374 rl = len(string(utf8.RuneError)) 375 } 376 if nbytes+rl > maxbytes { 377 // Grow the buffer. 378 maxbytes = maxbytes*2 + utf8.UTFMax 379 nb := make([]byte, maxbytes) 380 copy(nb, b[0:nbytes]) 381 b = nb 382 } 383 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) 384 } 385 i += wid 386 } 387 return b[0:nbytes] 388 } 389 390 // Repeat returns a new byte slice consisting of count copies of b. 391 func Repeat(b []byte, count int) []byte { 392 nb := make([]byte, len(b)*count) 393 bp := copy(nb, b) 394 for bp < len(nb) { 395 copy(nb[bp:], nb[:bp]) 396 bp *= 2 397 } 398 return nb 399 } 400 401 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case. 402 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } 403 404 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case. 405 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } 406 407 // ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case. 408 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } 409 410 // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 411 // upper case, giving priority to the special casing rules. 412 func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { 413 return Map(func(r rune) rune { return _case.ToUpper(r) }, s) 414 } 415 416 // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 417 // lower case, giving priority to the special casing rules. 418 func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { 419 return Map(func(r rune) rune { return _case.ToLower(r) }, s) 420 } 421 422 // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their 423 // title case, giving priority to the special casing rules. 424 func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { 425 return Map(func(r rune) rune { return _case.ToTitle(r) }, s) 426 } 427 428 // isSeparator reports whether the rune could mark a word boundary. 429 // TODO: update when package unicode captures more of the properties. 430 func isSeparator(r rune) bool { 431 // ASCII alphanumerics and underscore are not separators 432 if r <= 0x7F { 433 switch { 434 case '0' <= r && r <= '9': 435 return false 436 case 'a' <= r && r <= 'z': 437 return false 438 case 'A' <= r && r <= 'Z': 439 return false 440 case r == '_': 441 return false 442 } 443 return true 444 } 445 // Letters and digits are not separators 446 if unicode.IsLetter(r) || unicode.IsDigit(r) { 447 return false 448 } 449 // Otherwise, all we can do for now is treat spaces as separators. 450 return unicode.IsSpace(r) 451 } 452 453 // Title returns a copy of s with all Unicode letters that begin words 454 // mapped to their title case. 455 // 456 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. 457 func Title(s []byte) []byte { 458 // Use a closure here to remember state. 459 // Hackish but effective. Depends on Map scanning in order and calling 460 // the closure once per rune. 461 prev := ' ' 462 return Map( 463 func(r rune) rune { 464 if isSeparator(prev) { 465 prev = r 466 return unicode.ToTitle(r) 467 } 468 prev = r 469 return r 470 }, 471 s) 472 } 473 474 // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded 475 // Unicode code points c that satisfy f(c). 476 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { 477 i := indexFunc(s, f, false) 478 if i == -1 { 479 return nil 480 } 481 return s[i:] 482 } 483 484 // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 485 // encoded Unicode code points c that satisfy f(c). 486 func TrimRightFunc(s []byte, f func(r rune) bool) []byte { 487 i := lastIndexFunc(s, f, false) 488 if i >= 0 && s[i] >= utf8.RuneSelf { 489 _, wid := utf8.DecodeRune(s[i:]) 490 i += wid 491 } else { 492 i++ 493 } 494 return s[0:i] 495 } 496 497 // TrimFunc returns a subslice of s by slicing off all leading and trailing 498 // UTF-8-encoded Unicode code points c that satisfy f(c). 499 func TrimFunc(s []byte, f func(r rune) bool) []byte { 500 return TrimRightFunc(TrimLeftFunc(s, f), f) 501 } 502 503 // TrimPrefix returns s without the provided leading prefix string. 504 // If s doesn't start with prefix, s is returned unchanged. 505 func TrimPrefix(s, prefix []byte) []byte { 506 if HasPrefix(s, prefix) { 507 return s[len(prefix):] 508 } 509 return s 510 } 511 512 // TrimSuffix returns s without the provided trailing suffix string. 513 // If s doesn't end with suffix, s is returned unchanged. 514 func TrimSuffix(s, suffix []byte) []byte { 515 if HasSuffix(s, suffix) { 516 return s[:len(s)-len(suffix)] 517 } 518 return s 519 } 520 521 // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 522 // It returns the byte index in s of the first Unicode 523 // code point satisfying f(c), or -1 if none do. 524 func IndexFunc(s []byte, f func(r rune) bool) int { 525 return indexFunc(s, f, true) 526 } 527 528 // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. 529 // It returns the byte index in s of the last Unicode 530 // code point satisfying f(c), or -1 if none do. 531 func LastIndexFunc(s []byte, f func(r rune) bool) int { 532 return lastIndexFunc(s, f, true) 533 } 534 535 // indexFunc is the same as IndexFunc except that if 536 // truth==false, the sense of the predicate function is 537 // inverted. 538 func indexFunc(s []byte, f func(r rune) bool, truth bool) int { 539 start := 0 540 for start < len(s) { 541 wid := 1 542 r := rune(s[start]) 543 if r >= utf8.RuneSelf { 544 r, wid = utf8.DecodeRune(s[start:]) 545 } 546 if f(r) == truth { 547 return start 548 } 549 start += wid 550 } 551 return -1 552 } 553 554 // lastIndexFunc is the same as LastIndexFunc except that if 555 // truth==false, the sense of the predicate function is 556 // inverted. 557 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { 558 for i := len(s); i > 0; { 559 r, size := rune(s[i-1]), 1 560 if r >= utf8.RuneSelf { 561 r, size = utf8.DecodeLastRune(s[0:i]) 562 } 563 i -= size 564 if f(r) == truth { 565 return i 566 } 567 } 568 return -1 569 } 570 571 func makeCutsetFunc(cutset string) func(r rune) bool { 572 return func(r rune) bool { 573 for _, c := range cutset { 574 if c == r { 575 return true 576 } 577 } 578 return false 579 } 580 } 581 582 // Trim returns a subslice of s by slicing off all leading and 583 // trailing UTF-8-encoded Unicode code points contained in cutset. 584 func Trim(s []byte, cutset string) []byte { 585 return TrimFunc(s, makeCutsetFunc(cutset)) 586 } 587 588 // TrimLeft returns a subslice of s by slicing off all leading 589 // UTF-8-encoded Unicode code points contained in cutset. 590 func TrimLeft(s []byte, cutset string) []byte { 591 return TrimLeftFunc(s, makeCutsetFunc(cutset)) 592 } 593 594 // TrimRight returns a subslice of s by slicing off all trailing 595 // UTF-8-encoded Unicode code points that are contained in cutset. 596 func TrimRight(s []byte, cutset string) []byte { 597 return TrimRightFunc(s, makeCutsetFunc(cutset)) 598 } 599 600 // TrimSpace returns a subslice of s by slicing off all leading and 601 // trailing white space, as defined by Unicode. 602 func TrimSpace(s []byte) []byte { 603 return TrimFunc(s, unicode.IsSpace) 604 } 605 606 // Runes returns a slice of runes (Unicode code points) equivalent to s. 607 func Runes(s []byte) []rune { 608 t := make([]rune, utf8.RuneCount(s)) 609 i := 0 610 for len(s) > 0 { 611 r, l := utf8.DecodeRune(s) 612 t[i] = r 613 i++ 614 s = s[l:] 615 } 616 return t 617 } 618 619 // Replace returns a copy of the slice s with the first n 620 // non-overlapping instances of old replaced by new. 621 // If old is empty, it matches at the beginning of the slice 622 // and after each UTF-8 sequence, yielding up to k+1 replacements 623 // for a k-rune slice. 624 // If n < 0, there is no limit on the number of replacements. 625 func Replace(s, old, new []byte, n int) []byte { 626 m := 0 627 if n != 0 { 628 // Compute number of replacements. 629 m = Count(s, old) 630 } 631 if m == 0 { 632 // Just return a copy. 633 return append([]byte(nil), s...) 634 } 635 if n < 0 || m < n { 636 n = m 637 } 638 639 // Apply replacements to buffer. 640 t := make([]byte, len(s)+n*(len(new)-len(old))) 641 w := 0 642 start := 0 643 for i := 0; i < n; i++ { 644 j := start 645 if len(old) == 0 { 646 if i > 0 { 647 _, wid := utf8.DecodeRune(s[start:]) 648 j += wid 649 } 650 } else { 651 j += Index(s[start:], old) 652 } 653 w += copy(t[w:], s[start:j]) 654 w += copy(t[w:], new) 655 start = j + len(old) 656 } 657 w += copy(t[w:], s[start:]) 658 return t[0:w] 659 } 660 661 // EqualFold reports whether s and t, interpreted as UTF-8 strings, 662 // are equal under Unicode case-folding. 663 func EqualFold(s, t []byte) bool { 664 for len(s) != 0 && len(t) != 0 { 665 // Extract first rune from each. 666 var sr, tr rune 667 if s[0] < utf8.RuneSelf { 668 sr, s = rune(s[0]), s[1:] 669 } else { 670 r, size := utf8.DecodeRune(s) 671 sr, s = r, s[size:] 672 } 673 if t[0] < utf8.RuneSelf { 674 tr, t = rune(t[0]), t[1:] 675 } else { 676 r, size := utf8.DecodeRune(t) 677 tr, t = r, t[size:] 678 } 679 680 // If they match, keep going; if not, return false. 681 682 // Easy case. 683 if tr == sr { 684 continue 685 } 686 687 // Make sr < tr to simplify what follows. 688 if tr < sr { 689 tr, sr = sr, tr 690 } 691 // Fast check for ASCII. 692 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { 693 // ASCII, and sr is upper case. tr must be lower case. 694 if tr == sr+'a'-'A' { 695 continue 696 } 697 return false 698 } 699 700 // General case. SimpleFold(x) returns the next equivalent rune > x 701 // or wraps around to smaller values. 702 r := unicode.SimpleFold(sr) 703 for r != sr && r < tr { 704 r = unicode.SimpleFold(r) 705 } 706 if r == tr { 707 continue 708 } 709 return false 710 } 711 712 // One string is empty. Are both? 713 return len(s) == len(t) 714 }