github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/transform/transform.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package transform provides reader and writer wrappers that transform the 6 // bytes passing through as well as various transformations. Example 7 // transformations provided by other packages include normalization and 8 // conversion between character sets. 9 package transform // import "golang.org/x/text/transform" 10 11 import ( 12 "bytes" 13 "errors" 14 "io" 15 "unicode/utf8" 16 ) 17 18 var ( 19 // ErrShortDst means that the destination buffer was too short to 20 // receive all of the transformed bytes. 21 ErrShortDst = errors.New("transform: short destination buffer") 22 23 // ErrShortSrc means that the source buffer has insufficient data to 24 // complete the transformation. 25 ErrShortSrc = errors.New("transform: short source buffer") 26 27 // errInconsistentByteCount means that Transform returned success (nil 28 // error) but also returned nSrc inconsistent with the src argument. 29 errInconsistentByteCount = errors.New("transform: inconsistent byte count returned") 30 31 // errShortInternal means that an internal buffer is not large enough 32 // to make progress and the Transform operation must be aborted. 33 errShortInternal = errors.New("transform: short internal buffer") 34 ) 35 36 // Transformer transforms bytes. 37 type Transformer interface { 38 // Transform writes to dst the transformed bytes read from src, and 39 // returns the number of dst bytes written and src bytes read. The 40 // atEOF argument tells whether src represents the last bytes of the 41 // input. 42 // 43 // Callers should always process the nDst bytes produced and account 44 // for the nSrc bytes consumed before considering the error err. 45 // 46 // A nil error means that all of the transformed bytes (whether freshly 47 // transformed from src or left over from previous Transform calls) 48 // were written to dst. A nil error can be returned regardless of 49 // whether atEOF is true. If err is nil then nSrc must equal len(src); 50 // the converse is not necessarily true. 51 // 52 // ErrShortDst means that dst was too short to receive all of the 53 // transformed bytes. ErrShortSrc means that src had insufficient data 54 // to complete the transformation. If both conditions apply, then 55 // either error may be returned. Other than the error conditions listed 56 // here, implementations are free to report other errors that arise. 57 Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) 58 59 // Reset resets the state and allows a Transformer to be reused. 60 Reset() 61 } 62 63 // NopResetter can be embedded by implementations of Transformer to add a nop 64 // Reset method. 65 type NopResetter struct{} 66 67 // Reset implements the Reset method of the Transformer interface. 68 func (NopResetter) Reset() {} 69 70 // Reader wraps another io.Reader by transforming the bytes read. 71 type Reader struct { 72 r io.Reader 73 t Transformer 74 err error 75 76 // dst[dst0:dst1] contains bytes that have been transformed by t but 77 // not yet copied out via Read. 78 dst []byte 79 dst0, dst1 int 80 81 // src[src0:src1] contains bytes that have been read from r but not 82 // yet transformed through t. 83 src []byte 84 src0, src1 int 85 86 // transformComplete is whether the transformation is complete, 87 // regardless of whether or not it was successful. 88 transformComplete bool 89 } 90 91 const defaultBufSize = 4096 92 93 // NewReader returns a new Reader that wraps r by transforming the bytes read 94 // via t. It calls Reset on t. 95 func NewReader(r io.Reader, t Transformer) *Reader { 96 t.Reset() 97 return &Reader{ 98 r: r, 99 t: t, 100 dst: make([]byte, defaultBufSize), 101 src: make([]byte, defaultBufSize), 102 } 103 } 104 105 // Read implements the io.Reader interface. 106 func (r *Reader) Read(p []byte) (int, error) { 107 n, err := 0, error(nil) 108 for { 109 // Copy out any transformed bytes and return the final error if we are done. 110 if r.dst0 != r.dst1 { 111 n = copy(p, r.dst[r.dst0:r.dst1]) 112 r.dst0 += n 113 if r.dst0 == r.dst1 && r.transformComplete { 114 return n, r.err 115 } 116 return n, nil 117 } else if r.transformComplete { 118 return 0, r.err 119 } 120 121 // Try to transform some source bytes, or to flush the transformer if we 122 // are out of source bytes. We do this even if r.r.Read returned an error. 123 // As the io.Reader documentation says, "process the n > 0 bytes returned 124 // before considering the error". 125 if r.src0 != r.src1 || r.err != nil { 126 r.dst0 = 0 127 r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF) 128 r.src0 += n 129 130 switch { 131 case err == nil: 132 if r.src0 != r.src1 { 133 r.err = errInconsistentByteCount 134 } 135 // The Transform call was successful; we are complete if we 136 // cannot read more bytes into src. 137 r.transformComplete = r.err != nil 138 continue 139 case err == ErrShortDst && (r.dst1 != 0 || n != 0): 140 // Make room in dst by copying out, and try again. 141 continue 142 case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil: 143 // Read more bytes into src via the code below, and try again. 144 default: 145 r.transformComplete = true 146 // The reader error (r.err) takes precedence over the 147 // transformer error (err) unless r.err is nil or io.EOF. 148 if r.err == nil || r.err == io.EOF { 149 r.err = err 150 } 151 continue 152 } 153 } 154 155 // Move any untransformed source bytes to the start of the buffer 156 // and read more bytes. 157 if r.src0 != 0 { 158 r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1]) 159 } 160 n, r.err = r.r.Read(r.src[r.src1:]) 161 r.src1 += n 162 } 163 } 164 165 // TODO: implement ReadByte (and ReadRune??). 166 167 // Writer wraps another io.Writer by transforming the bytes read. 168 // The user needs to call Close to flush unwritten bytes that may 169 // be buffered. 170 type Writer struct { 171 w io.Writer 172 t Transformer 173 dst []byte 174 175 // src[:n] contains bytes that have not yet passed through t. 176 src []byte 177 n int 178 } 179 180 // NewWriter returns a new Writer that wraps w by transforming the bytes written 181 // via t. It calls Reset on t. 182 func NewWriter(w io.Writer, t Transformer) *Writer { 183 t.Reset() 184 return &Writer{ 185 w: w, 186 t: t, 187 dst: make([]byte, defaultBufSize), 188 src: make([]byte, defaultBufSize), 189 } 190 } 191 192 // Write implements the io.Writer interface. If there are not enough 193 // bytes available to complete a Transform, the bytes will be buffered 194 // for the next write. Call Close to convert the remaining bytes. 195 func (w *Writer) Write(data []byte) (n int, err error) { 196 src := data 197 if w.n > 0 { 198 // Append bytes from data to the last remainder. 199 // TODO: limit the amount copied on first try. 200 n = copy(w.src[w.n:], data) 201 w.n += n 202 src = w.src[:w.n] 203 } 204 for { 205 nDst, nSrc, err := w.t.Transform(w.dst, src, false) 206 if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { 207 return n, werr 208 } 209 src = src[nSrc:] 210 if w.n > 0 && len(src) <= n { 211 // Enough bytes from w.src have been consumed. We make src point 212 // to data instead to reduce the copying. 213 w.n = 0 214 n -= len(src) 215 src = data[n:] 216 if n < len(data) && (err == nil || err == ErrShortSrc) { 217 continue 218 } 219 } else { 220 n += nSrc 221 } 222 switch { 223 case err == ErrShortDst && (nDst > 0 || nSrc > 0): 224 case err == ErrShortSrc && len(src) < len(w.src): 225 m := copy(w.src, src) 226 // If w.n > 0, bytes from data were already copied to w.src and n 227 // was already set to the number of bytes consumed. 228 if w.n == 0 { 229 n += m 230 } 231 w.n = m 232 return n, nil 233 case err == nil && w.n > 0: 234 return n, errInconsistentByteCount 235 default: 236 return n, err 237 } 238 } 239 } 240 241 // Close implements the io.Closer interface. 242 func (w *Writer) Close() error { 243 for src := w.src[:w.n]; len(src) > 0; { 244 nDst, nSrc, err := w.t.Transform(w.dst, src, true) 245 if nDst == 0 { 246 return err 247 } 248 if _, werr := w.w.Write(w.dst[:nDst]); werr != nil { 249 return werr 250 } 251 if err != ErrShortDst { 252 return err 253 } 254 src = src[nSrc:] 255 } 256 return nil 257 } 258 259 type nop struct{ NopResetter } 260 261 func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 262 n := copy(dst, src) 263 if n < len(src) { 264 err = ErrShortDst 265 } 266 return n, n, err 267 } 268 269 type discard struct{ NopResetter } 270 271 func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 272 return 0, len(src), nil 273 } 274 275 var ( 276 // Discard is a Transformer for which all Transform calls succeed 277 // by consuming all bytes and writing nothing. 278 Discard Transformer = discard{} 279 280 // Nop is a Transformer that copies src to dst. 281 Nop Transformer = nop{} 282 ) 283 284 // chain is a sequence of links. A chain with N Transformers has N+1 links and 285 // N+1 buffers. Of those N+1 buffers, the first and last are the src and dst 286 // buffers given to chain.Transform and the middle N-1 buffers are intermediate 287 // buffers owned by the chain. The i'th link transforms bytes from the i'th 288 // buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer 289 // chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N). 290 type chain struct { 291 link []link 292 err error 293 // errStart is the index at which the error occurred plus 1. Processing 294 // errStart at this level at the next call to Transform. As long as 295 // errStart > 0, chain will not consume any more source bytes. 296 errStart int 297 } 298 299 func (c *chain) fatalError(errIndex int, err error) { 300 if i := errIndex + 1; i > c.errStart { 301 c.errStart = i 302 c.err = err 303 } 304 } 305 306 type link struct { 307 t Transformer 308 // b[p:n] holds the bytes to be transformed by t. 309 b []byte 310 p int 311 n int 312 } 313 314 func (l *link) src() []byte { 315 return l.b[l.p:l.n] 316 } 317 318 func (l *link) dst() []byte { 319 return l.b[l.n:] 320 } 321 322 // Chain returns a Transformer that applies t in sequence. 323 func Chain(t ...Transformer) Transformer { 324 if len(t) == 0 { 325 return nop{} 326 } 327 c := &chain{link: make([]link, len(t)+1)} 328 for i, tt := range t { 329 c.link[i].t = tt 330 } 331 // Allocate intermediate buffers. 332 b := make([][defaultBufSize]byte, len(t)-1) 333 for i := range b { 334 c.link[i+1].b = b[i][:] 335 } 336 return c 337 } 338 339 // Reset resets the state of Chain. It calls Reset on all the Transformers. 340 func (c *chain) Reset() { 341 for i, l := range c.link { 342 if l.t != nil { 343 l.t.Reset() 344 } 345 c.link[i].p, c.link[i].n = 0, 0 346 } 347 } 348 349 // Transform applies the transformers of c in sequence. 350 func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 351 // Set up src and dst in the chain. 352 srcL := &c.link[0] 353 dstL := &c.link[len(c.link)-1] 354 srcL.b, srcL.p, srcL.n = src, 0, len(src) 355 dstL.b, dstL.n = dst, 0 356 var lastFull, needProgress bool // for detecting progress 357 358 // i is the index of the next Transformer to apply, for i in [low, high]. 359 // low is the lowest index for which c.link[low] may still produce bytes. 360 // high is the highest index for which c.link[high] has a Transformer. 361 // The error returned by Transform determines whether to increase or 362 // decrease i. We try to completely fill a buffer before converting it. 363 for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; { 364 in, out := &c.link[i], &c.link[i+1] 365 nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i) 366 out.n += nDst 367 in.p += nSrc 368 if i > 0 && in.p == in.n { 369 in.p, in.n = 0, 0 370 } 371 needProgress, lastFull = lastFull, false 372 switch err0 { 373 case ErrShortDst: 374 // Process the destination buffer next. Return if we are already 375 // at the high index. 376 if i == high { 377 return dstL.n, srcL.p, ErrShortDst 378 } 379 if out.n != 0 { 380 i++ 381 // If the Transformer at the next index is not able to process any 382 // source bytes there is nothing that can be done to make progress 383 // and the bytes will remain unprocessed. lastFull is used to 384 // detect this and break out of the loop with a fatal error. 385 lastFull = true 386 continue 387 } 388 // The destination buffer was too small, but is completely empty. 389 // Return a fatal error as this transformation can never complete. 390 c.fatalError(i, errShortInternal) 391 case ErrShortSrc: 392 if i == 0 { 393 // Save ErrShortSrc in err. All other errors take precedence. 394 err = ErrShortSrc 395 break 396 } 397 // Source bytes were depleted before filling up the destination buffer. 398 // Verify we made some progress, move the remaining bytes to the errStart 399 // and try to get more source bytes. 400 if needProgress && nSrc == 0 || in.n-in.p == len(in.b) { 401 // There were not enough source bytes to proceed while the source 402 // buffer cannot hold any more bytes. Return a fatal error as this 403 // transformation can never complete. 404 c.fatalError(i, errShortInternal) 405 break 406 } 407 // in.b is an internal buffer and we can make progress. 408 in.p, in.n = 0, copy(in.b, in.src()) 409 fallthrough 410 case nil: 411 // if i == low, we have depleted the bytes at index i or any lower levels. 412 // In that case we increase low and i. In all other cases we decrease i to 413 // fetch more bytes before proceeding to the next index. 414 if i > low { 415 i-- 416 continue 417 } 418 default: 419 c.fatalError(i, err0) 420 } 421 // Exhausted level low or fatal error: increase low and continue 422 // to process the bytes accepted so far. 423 i++ 424 low = i 425 } 426 427 // If c.errStart > 0, this means we found a fatal error. We will clear 428 // all upstream buffers. At this point, no more progress can be made 429 // downstream, as Transform would have bailed while handling ErrShortDst. 430 if c.errStart > 0 { 431 for i := 1; i < c.errStart; i++ { 432 c.link[i].p, c.link[i].n = 0, 0 433 } 434 err, c.errStart, c.err = c.err, 0, nil 435 } 436 return dstL.n, srcL.p, err 437 } 438 439 // RemoveFunc returns a Transformer that removes from the input all runes r for 440 // which f(r) is true. Illegal bytes in the input are replaced by RuneError. 441 func RemoveFunc(f func(r rune) bool) Transformer { 442 return removeF(f) 443 } 444 445 type removeF func(r rune) bool 446 447 func (removeF) Reset() {} 448 449 // Transform implements the Transformer interface. 450 func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 451 for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] { 452 453 if r = rune(src[0]); r < utf8.RuneSelf { 454 sz = 1 455 } else { 456 r, sz = utf8.DecodeRune(src) 457 458 if sz == 1 { 459 // Invalid rune. 460 if !atEOF && !utf8.FullRune(src) { 461 err = ErrShortSrc 462 break 463 } 464 // We replace illegal bytes with RuneError. Not doing so might 465 // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. 466 // The resulting byte sequence may subsequently contain runes 467 // for which t(r) is true that were passed unnoticed. 468 if !t(r) { 469 if nDst+3 > len(dst) { 470 err = ErrShortDst 471 break 472 } 473 nDst += copy(dst[nDst:], "\uFFFD") 474 } 475 nSrc++ 476 continue 477 } 478 } 479 480 if !t(r) { 481 if nDst+sz > len(dst) { 482 err = ErrShortDst 483 break 484 } 485 nDst += copy(dst[nDst:], src[:sz]) 486 } 487 nSrc += sz 488 } 489 return 490 } 491 492 // grow returns a new []byte that is longer than b, and copies the first n bytes 493 // of b to the start of the new slice. 494 func grow(b []byte, n int) []byte { 495 m := len(b) 496 if m <= 256 { 497 m *= 2 498 } else { 499 m += m >> 1 500 } 501 buf := make([]byte, m) 502 copy(buf, b[:n]) 503 return buf 504 } 505 506 const initialBufSize = 128 507 508 // String returns a string with the result of converting s[:n] using t, where 509 // n <= len(s). If err == nil, n will be len(s). It calls Reset on t. 510 func String(t Transformer, s string) (result string, n int, err error) { 511 if s == "" { 512 return "", 0, nil 513 } 514 515 t.Reset() 516 517 // Allocate only once. Note that both dst and src escape when passed to 518 // Transform. 519 buf := [2 * initialBufSize]byte{} 520 dst := buf[:initialBufSize:initialBufSize] 521 src := buf[initialBufSize : 2*initialBufSize] 522 523 // Avoid allocation if the transformed string is identical to the original. 524 // After this loop, pDst will point to the furthest point in s for which it 525 // could be detected that t gives equal results, src[:nSrc] will 526 // indicated the last processed chunk of s for which the output is not equal 527 // and dst[:nDst] will be the transform of this chunk. 528 var nDst, nSrc int 529 pDst := 0 // Used as index in both src and dst in this loop. 530 for { 531 n := copy(src, s[pDst:]) 532 nDst, nSrc, err = t.Transform(dst, src[:n], pDst+n == len(s)) 533 534 // Note 1: we will not enter the loop with pDst == len(s) and we will 535 // not end the loop with it either. So if nSrc is 0, this means there is 536 // some kind of error from which we cannot recover given the current 537 // buffer sizes. We will give up in this case. 538 // Note 2: it is not entirely correct to simply do a bytes.Equal as 539 // a Transformer may buffer internally. It will work in most cases, 540 // though, and no harm is done if it doesn't work. 541 // TODO: let transformers implement an optional Spanner interface, akin 542 // to norm's QuickSpan. This would even allow us to avoid any allocation. 543 if nSrc == 0 || !bytes.Equal(dst[:nDst], src[:nSrc]) { 544 break 545 } 546 547 if pDst += nDst; pDst == len(s) { 548 return s, pDst, nil 549 } 550 } 551 552 // Move the bytes seen so far to dst. 553 pSrc := pDst + nSrc 554 if pDst+nDst <= initialBufSize { 555 copy(dst[pDst:], dst[:nDst]) 556 } else { 557 b := make([]byte, len(s)+nDst-nSrc) 558 copy(b[pDst:], dst[:nDst]) 559 dst = b 560 } 561 copy(dst, s[:pDst]) 562 pDst += nDst 563 564 if err != nil && err != ErrShortDst && err != ErrShortSrc { 565 return string(dst[:pDst]), pSrc, err 566 } 567 568 // Complete the string with the remainder. 569 for { 570 n := copy(src, s[pSrc:]) 571 nDst, nSrc, err = t.Transform(dst[pDst:], src[:n], pSrc+n == len(s)) 572 pDst += nDst 573 pSrc += nSrc 574 575 switch err { 576 case nil: 577 if pSrc == len(s) { 578 return string(dst[:pDst]), pSrc, nil 579 } 580 case ErrShortDst: 581 // Do not grow as long as we can make progress. This may avoid 582 // excessive allocations. 583 if nDst == 0 { 584 dst = grow(dst, pDst) 585 } 586 case ErrShortSrc: 587 if nSrc == 0 { 588 src = grow(src, 0) 589 } 590 default: 591 return string(dst[:pDst]), pSrc, err 592 } 593 } 594 } 595 596 // Bytes returns a new byte slice with the result of converting b[:n] using t, 597 // where n <= len(b). If err == nil, n will be len(b). It calls Reset on t. 598 func Bytes(t Transformer, b []byte) (result []byte, n int, err error) { 599 t.Reset() 600 dst := make([]byte, len(b)) 601 pDst, pSrc := 0, 0 602 for { 603 nDst, nSrc, err := t.Transform(dst[pDst:], b[pSrc:], true) 604 pDst += nDst 605 pSrc += nSrc 606 if err != ErrShortDst { 607 return dst[:pDst], pSrc, err 608 } 609 610 // Grow the destination buffer, but do not grow as long as we can make 611 // progress. This may avoid excessive allocations. 612 if nDst == 0 { 613 dst = grow(dst, pDst) 614 } 615 } 616 }