github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/transform/transform.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package transform provides reader and writer wrappers that transform the
     6  // bytes passing through as well as various transformations. Example
     7  // transformations provided by other packages include normalization and
     8  // conversion between character sets.
     9  package transform // import "golang.org/x/text/transform"
    10  
    11  import (
    12  	"bytes"
    13  	"errors"
    14  	"io"
    15  	"unicode/utf8"
    16  )
    17  
    18  var (
    19  	// ErrShortDst means that the destination buffer was too short to
    20  	// receive all of the transformed bytes.
    21  	ErrShortDst = errors.New("transform: short destination buffer")
    22  
    23  	// ErrShortSrc means that the source buffer has insufficient data to
    24  	// complete the transformation.
    25  	ErrShortSrc = errors.New("transform: short source buffer")
    26  
    27  	// errInconsistentByteCount means that Transform returned success (nil
    28  	// error) but also returned nSrc inconsistent with the src argument.
    29  	errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
    30  
    31  	// errShortInternal means that an internal buffer is not large enough
    32  	// to make progress and the Transform operation must be aborted.
    33  	errShortInternal = errors.New("transform: short internal buffer")
    34  )
    35  
    36  // Transformer transforms bytes.
    37  type Transformer interface {
    38  	// Transform writes to dst the transformed bytes read from src, and
    39  	// returns the number of dst bytes written and src bytes read. The
    40  	// atEOF argument tells whether src represents the last bytes of the
    41  	// input.
    42  	//
    43  	// Callers should always process the nDst bytes produced and account
    44  	// for the nSrc bytes consumed before considering the error err.
    45  	//
    46  	// A nil error means that all of the transformed bytes (whether freshly
    47  	// transformed from src or left over from previous Transform calls)
    48  	// were written to dst. A nil error can be returned regardless of
    49  	// whether atEOF is true. If err is nil then nSrc must equal len(src);
    50  	// the converse is not necessarily true.
    51  	//
    52  	// ErrShortDst means that dst was too short to receive all of the
    53  	// transformed bytes. ErrShortSrc means that src had insufficient data
    54  	// to complete the transformation. If both conditions apply, then
    55  	// either error may be returned. Other than the error conditions listed
    56  	// here, implementations are free to report other errors that arise.
    57  	Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
    58  
    59  	// Reset resets the state and allows a Transformer to be reused.
    60  	Reset()
    61  }
    62  
    63  // NopResetter can be embedded by implementations of Transformer to add a nop
    64  // Reset method.
    65  type NopResetter struct{}
    66  
    67  // Reset implements the Reset method of the Transformer interface.
    68  func (NopResetter) Reset() {}
    69  
    70  // Reader wraps another io.Reader by transforming the bytes read.
    71  type Reader struct {
    72  	r   io.Reader
    73  	t   Transformer
    74  	err error
    75  
    76  	// dst[dst0:dst1] contains bytes that have been transformed by t but
    77  	// not yet copied out via Read.
    78  	dst        []byte
    79  	dst0, dst1 int
    80  
    81  	// src[src0:src1] contains bytes that have been read from r but not
    82  	// yet transformed through t.
    83  	src        []byte
    84  	src0, src1 int
    85  
    86  	// transformComplete is whether the transformation is complete,
    87  	// regardless of whether or not it was successful.
    88  	transformComplete bool
    89  }
    90  
    91  const defaultBufSize = 4096
    92  
    93  // NewReader returns a new Reader that wraps r by transforming the bytes read
    94  // via t. It calls Reset on t.
    95  func NewReader(r io.Reader, t Transformer) *Reader {
    96  	t.Reset()
    97  	return &Reader{
    98  		r:   r,
    99  		t:   t,
   100  		dst: make([]byte, defaultBufSize),
   101  		src: make([]byte, defaultBufSize),
   102  	}
   103  }
   104  
   105  // Read implements the io.Reader interface.
   106  func (r *Reader) Read(p []byte) (int, error) {
   107  	n, err := 0, error(nil)
   108  	for {
   109  		// Copy out any transformed bytes and return the final error if we are done.
   110  		if r.dst0 != r.dst1 {
   111  			n = copy(p, r.dst[r.dst0:r.dst1])
   112  			r.dst0 += n
   113  			if r.dst0 == r.dst1 && r.transformComplete {
   114  				return n, r.err
   115  			}
   116  			return n, nil
   117  		} else if r.transformComplete {
   118  			return 0, r.err
   119  		}
   120  
   121  		// Try to transform some source bytes, or to flush the transformer if we
   122  		// are out of source bytes. We do this even if r.r.Read returned an error.
   123  		// As the io.Reader documentation says, "process the n > 0 bytes returned
   124  		// before considering the error".
   125  		if r.src0 != r.src1 || r.err != nil {
   126  			r.dst0 = 0
   127  			r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
   128  			r.src0 += n
   129  
   130  			switch {
   131  			case err == nil:
   132  				if r.src0 != r.src1 {
   133  					r.err = errInconsistentByteCount
   134  				}
   135  				// The Transform call was successful; we are complete if we
   136  				// cannot read more bytes into src.
   137  				r.transformComplete = r.err != nil
   138  				continue
   139  			case err == ErrShortDst && (r.dst1 != 0 || n != 0):
   140  				// Make room in dst by copying out, and try again.
   141  				continue
   142  			case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
   143  				// Read more bytes into src via the code below, and try again.
   144  			default:
   145  				r.transformComplete = true
   146  				// The reader error (r.err) takes precedence over the
   147  				// transformer error (err) unless r.err is nil or io.EOF.
   148  				if r.err == nil || r.err == io.EOF {
   149  					r.err = err
   150  				}
   151  				continue
   152  			}
   153  		}
   154  
   155  		// Move any untransformed source bytes to the start of the buffer
   156  		// and read more bytes.
   157  		if r.src0 != 0 {
   158  			r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
   159  		}
   160  		n, r.err = r.r.Read(r.src[r.src1:])
   161  		r.src1 += n
   162  	}
   163  }
   164  
   165  // TODO: implement ReadByte (and ReadRune??).
   166  
   167  // Writer wraps another io.Writer by transforming the bytes read.
   168  // The user needs to call Close to flush unwritten bytes that may
   169  // be buffered.
   170  type Writer struct {
   171  	w   io.Writer
   172  	t   Transformer
   173  	dst []byte
   174  
   175  	// src[:n] contains bytes that have not yet passed through t.
   176  	src []byte
   177  	n   int
   178  }
   179  
   180  // NewWriter returns a new Writer that wraps w by transforming the bytes written
   181  // via t. It calls Reset on t.
   182  func NewWriter(w io.Writer, t Transformer) *Writer {
   183  	t.Reset()
   184  	return &Writer{
   185  		w:   w,
   186  		t:   t,
   187  		dst: make([]byte, defaultBufSize),
   188  		src: make([]byte, defaultBufSize),
   189  	}
   190  }
   191  
   192  // Write implements the io.Writer interface. If there are not enough
   193  // bytes available to complete a Transform, the bytes will be buffered
   194  // for the next write. Call Close to convert the remaining bytes.
   195  func (w *Writer) Write(data []byte) (n int, err error) {
   196  	src := data
   197  	if w.n > 0 {
   198  		// Append bytes from data to the last remainder.
   199  		// TODO: limit the amount copied on first try.
   200  		n = copy(w.src[w.n:], data)
   201  		w.n += n
   202  		src = w.src[:w.n]
   203  	}
   204  	for {
   205  		nDst, nSrc, err := w.t.Transform(w.dst, src, false)
   206  		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
   207  			return n, werr
   208  		}
   209  		src = src[nSrc:]
   210  		if w.n > 0 && len(src) <= n {
   211  			// Enough bytes from w.src have been consumed. We make src point
   212  			// to data instead to reduce the copying.
   213  			w.n = 0
   214  			n -= len(src)
   215  			src = data[n:]
   216  			if n < len(data) && (err == nil || err == ErrShortSrc) {
   217  				continue
   218  			}
   219  		} else {
   220  			n += nSrc
   221  		}
   222  		switch {
   223  		case err == ErrShortDst && (nDst > 0 || nSrc > 0):
   224  		case err == ErrShortSrc && len(src) < len(w.src):
   225  			m := copy(w.src, src)
   226  			// If w.n > 0, bytes from data were already copied to w.src and n
   227  			// was already set to the number of bytes consumed.
   228  			if w.n == 0 {
   229  				n += m
   230  			}
   231  			w.n = m
   232  			return n, nil
   233  		case err == nil && w.n > 0:
   234  			return n, errInconsistentByteCount
   235  		default:
   236  			return n, err
   237  		}
   238  	}
   239  }
   240  
   241  // Close implements the io.Closer interface.
   242  func (w *Writer) Close() error {
   243  	for src := w.src[:w.n]; len(src) > 0; {
   244  		nDst, nSrc, err := w.t.Transform(w.dst, src, true)
   245  		if nDst == 0 {
   246  			return err
   247  		}
   248  		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
   249  			return werr
   250  		}
   251  		if err != ErrShortDst {
   252  			return err
   253  		}
   254  		src = src[nSrc:]
   255  	}
   256  	return nil
   257  }
   258  
   259  type nop struct{ NopResetter }
   260  
   261  func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   262  	n := copy(dst, src)
   263  	if n < len(src) {
   264  		err = ErrShortDst
   265  	}
   266  	return n, n, err
   267  }
   268  
   269  type discard struct{ NopResetter }
   270  
   271  func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   272  	return 0, len(src), nil
   273  }
   274  
   275  var (
   276  	// Discard is a Transformer for which all Transform calls succeed
   277  	// by consuming all bytes and writing nothing.
   278  	Discard Transformer = discard{}
   279  
   280  	// Nop is a Transformer that copies src to dst.
   281  	Nop Transformer = nop{}
   282  )
   283  
   284  // chain is a sequence of links. A chain with N Transformers has N+1 links and
   285  // N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
   286  // buffers given to chain.Transform and the middle N-1 buffers are intermediate
   287  // buffers owned by the chain. The i'th link transforms bytes from the i'th
   288  // buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
   289  // chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
   290  type chain struct {
   291  	link []link
   292  	err  error
   293  	// errStart is the index at which the error occurred plus 1. Processing
   294  	// errStart at this level at the next call to Transform. As long as
   295  	// errStart > 0, chain will not consume any more source bytes.
   296  	errStart int
   297  }
   298  
   299  func (c *chain) fatalError(errIndex int, err error) {
   300  	if i := errIndex + 1; i > c.errStart {
   301  		c.errStart = i
   302  		c.err = err
   303  	}
   304  }
   305  
   306  type link struct {
   307  	t Transformer
   308  	// b[p:n] holds the bytes to be transformed by t.
   309  	b []byte
   310  	p int
   311  	n int
   312  }
   313  
   314  func (l *link) src() []byte {
   315  	return l.b[l.p:l.n]
   316  }
   317  
   318  func (l *link) dst() []byte {
   319  	return l.b[l.n:]
   320  }
   321  
   322  // Chain returns a Transformer that applies t in sequence.
   323  func Chain(t ...Transformer) Transformer {
   324  	if len(t) == 0 {
   325  		return nop{}
   326  	}
   327  	c := &chain{link: make([]link, len(t)+1)}
   328  	for i, tt := range t {
   329  		c.link[i].t = tt
   330  	}
   331  	// Allocate intermediate buffers.
   332  	b := make([][defaultBufSize]byte, len(t)-1)
   333  	for i := range b {
   334  		c.link[i+1].b = b[i][:]
   335  	}
   336  	return c
   337  }
   338  
   339  // Reset resets the state of Chain. It calls Reset on all the Transformers.
   340  func (c *chain) Reset() {
   341  	for i, l := range c.link {
   342  		if l.t != nil {
   343  			l.t.Reset()
   344  		}
   345  		c.link[i].p, c.link[i].n = 0, 0
   346  	}
   347  }
   348  
   349  // Transform applies the transformers of c in sequence.
   350  func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   351  	// Set up src and dst in the chain.
   352  	srcL := &c.link[0]
   353  	dstL := &c.link[len(c.link)-1]
   354  	srcL.b, srcL.p, srcL.n = src, 0, len(src)
   355  	dstL.b, dstL.n = dst, 0
   356  	var lastFull, needProgress bool // for detecting progress
   357  
   358  	// i is the index of the next Transformer to apply, for i in [low, high].
   359  	// low is the lowest index for which c.link[low] may still produce bytes.
   360  	// high is the highest index for which c.link[high] has a Transformer.
   361  	// The error returned by Transform determines whether to increase or
   362  	// decrease i. We try to completely fill a buffer before converting it.
   363  	for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
   364  		in, out := &c.link[i], &c.link[i+1]
   365  		nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
   366  		out.n += nDst
   367  		in.p += nSrc
   368  		if i > 0 && in.p == in.n {
   369  			in.p, in.n = 0, 0
   370  		}
   371  		needProgress, lastFull = lastFull, false
   372  		switch err0 {
   373  		case ErrShortDst:
   374  			// Process the destination buffer next. Return if we are already
   375  			// at the high index.
   376  			if i == high {
   377  				return dstL.n, srcL.p, ErrShortDst
   378  			}
   379  			if out.n != 0 {
   380  				i++
   381  				// If the Transformer at the next index is not able to process any
   382  				// source bytes there is nothing that can be done to make progress
   383  				// and the bytes will remain unprocessed. lastFull is used to
   384  				// detect this and break out of the loop with a fatal error.
   385  				lastFull = true
   386  				continue
   387  			}
   388  			// The destination buffer was too small, but is completely empty.
   389  			// Return a fatal error as this transformation can never complete.
   390  			c.fatalError(i, errShortInternal)
   391  		case ErrShortSrc:
   392  			if i == 0 {
   393  				// Save ErrShortSrc in err. All other errors take precedence.
   394  				err = ErrShortSrc
   395  				break
   396  			}
   397  			// Source bytes were depleted before filling up the destination buffer.
   398  			// Verify we made some progress, move the remaining bytes to the errStart
   399  			// and try to get more source bytes.
   400  			if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
   401  				// There were not enough source bytes to proceed while the source
   402  				// buffer cannot hold any more bytes. Return a fatal error as this
   403  				// transformation can never complete.
   404  				c.fatalError(i, errShortInternal)
   405  				break
   406  			}
   407  			// in.b is an internal buffer and we can make progress.
   408  			in.p, in.n = 0, copy(in.b, in.src())
   409  			fallthrough
   410  		case nil:
   411  			// if i == low, we have depleted the bytes at index i or any lower levels.
   412  			// In that case we increase low and i. In all other cases we decrease i to
   413  			// fetch more bytes before proceeding to the next index.
   414  			if i > low {
   415  				i--
   416  				continue
   417  			}
   418  		default:
   419  			c.fatalError(i, err0)
   420  		}
   421  		// Exhausted level low or fatal error: increase low and continue
   422  		// to process the bytes accepted so far.
   423  		i++
   424  		low = i
   425  	}
   426  
   427  	// If c.errStart > 0, this means we found a fatal error.  We will clear
   428  	// all upstream buffers. At this point, no more progress can be made
   429  	// downstream, as Transform would have bailed while handling ErrShortDst.
   430  	if c.errStart > 0 {
   431  		for i := 1; i < c.errStart; i++ {
   432  			c.link[i].p, c.link[i].n = 0, 0
   433  		}
   434  		err, c.errStart, c.err = c.err, 0, nil
   435  	}
   436  	return dstL.n, srcL.p, err
   437  }
   438  
   439  // RemoveFunc returns a Transformer that removes from the input all runes r for
   440  // which f(r) is true. Illegal bytes in the input are replaced by RuneError.
   441  func RemoveFunc(f func(r rune) bool) Transformer {
   442  	return removeF(f)
   443  }
   444  
   445  type removeF func(r rune) bool
   446  
   447  func (removeF) Reset() {}
   448  
   449  // Transform implements the Transformer interface.
   450  func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
   451  	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
   452  
   453  		if r = rune(src[0]); r < utf8.RuneSelf {
   454  			sz = 1
   455  		} else {
   456  			r, sz = utf8.DecodeRune(src)
   457  
   458  			if sz == 1 {
   459  				// Invalid rune.
   460  				if !atEOF && !utf8.FullRune(src) {
   461  					err = ErrShortSrc
   462  					break
   463  				}
   464  				// We replace illegal bytes with RuneError. Not doing so might
   465  				// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
   466  				// The resulting byte sequence may subsequently contain runes
   467  				// for which t(r) is true that were passed unnoticed.
   468  				if !t(r) {
   469  					if nDst+3 > len(dst) {
   470  						err = ErrShortDst
   471  						break
   472  					}
   473  					nDst += copy(dst[nDst:], "\uFFFD")
   474  				}
   475  				nSrc++
   476  				continue
   477  			}
   478  		}
   479  
   480  		if !t(r) {
   481  			if nDst+sz > len(dst) {
   482  				err = ErrShortDst
   483  				break
   484  			}
   485  			nDst += copy(dst[nDst:], src[:sz])
   486  		}
   487  		nSrc += sz
   488  	}
   489  	return
   490  }
   491  
   492  // grow returns a new []byte that is longer than b, and copies the first n bytes
   493  // of b to the start of the new slice.
   494  func grow(b []byte, n int) []byte {
   495  	m := len(b)
   496  	if m <= 256 {
   497  		m *= 2
   498  	} else {
   499  		m += m >> 1
   500  	}
   501  	buf := make([]byte, m)
   502  	copy(buf, b[:n])
   503  	return buf
   504  }
   505  
   506  const initialBufSize = 128
   507  
   508  // String returns a string with the result of converting s[:n] using t, where
   509  // n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
   510  func String(t Transformer, s string) (result string, n int, err error) {
   511  	if s == "" {
   512  		return "", 0, nil
   513  	}
   514  
   515  	t.Reset()
   516  
   517  	// Allocate only once. Note that both dst and src escape when passed to
   518  	// Transform.
   519  	buf := [2 * initialBufSize]byte{}
   520  	dst := buf[:initialBufSize:initialBufSize]
   521  	src := buf[initialBufSize : 2*initialBufSize]
   522  
   523  	// Avoid allocation if the transformed string is identical to the original.
   524  	// After this loop, pDst will point to the furthest point in s for which it
   525  	// could be detected that t gives equal results, src[:nSrc] will
   526  	// indicated the last processed chunk of s for which the output is not equal
   527  	// and dst[:nDst] will be the transform of this chunk.
   528  	var nDst, nSrc int
   529  	pDst := 0 // Used as index in both src and dst in this loop.
   530  	for {
   531  		n := copy(src, s[pDst:])
   532  		nDst, nSrc, err = t.Transform(dst, src[:n], pDst+n == len(s))
   533  
   534  		// Note 1: we will not enter the loop with pDst == len(s) and we will
   535  		// not end the loop with it either. So if nSrc is 0, this means there is
   536  		// some kind of error from which we cannot recover given the current
   537  		// buffer sizes. We will give up in this case.
   538  		// Note 2: it is not entirely correct to simply do a bytes.Equal as
   539  		// a Transformer may buffer internally. It will work in most cases,
   540  		// though, and no harm is done if it doesn't work.
   541  		// TODO:  let transformers implement an optional Spanner interface, akin
   542  		// to norm's QuickSpan. This would even allow us to avoid any allocation.
   543  		if nSrc == 0 || !bytes.Equal(dst[:nDst], src[:nSrc]) {
   544  			break
   545  		}
   546  
   547  		if pDst += nDst; pDst == len(s) {
   548  			return s, pDst, nil
   549  		}
   550  	}
   551  
   552  	// Move the bytes seen so far to dst.
   553  	pSrc := pDst + nSrc
   554  	if pDst+nDst <= initialBufSize {
   555  		copy(dst[pDst:], dst[:nDst])
   556  	} else {
   557  		b := make([]byte, len(s)+nDst-nSrc)
   558  		copy(b[pDst:], dst[:nDst])
   559  		dst = b
   560  	}
   561  	copy(dst, s[:pDst])
   562  	pDst += nDst
   563  
   564  	if err != nil && err != ErrShortDst && err != ErrShortSrc {
   565  		return string(dst[:pDst]), pSrc, err
   566  	}
   567  
   568  	// Complete the string with the remainder.
   569  	for {
   570  		n := copy(src, s[pSrc:])
   571  		nDst, nSrc, err = t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
   572  		pDst += nDst
   573  		pSrc += nSrc
   574  
   575  		switch err {
   576  		case nil:
   577  			if pSrc == len(s) {
   578  				return string(dst[:pDst]), pSrc, nil
   579  			}
   580  		case ErrShortDst:
   581  			// Do not grow as long as we can make progress. This may avoid
   582  			// excessive allocations.
   583  			if nDst == 0 {
   584  				dst = grow(dst, pDst)
   585  			}
   586  		case ErrShortSrc:
   587  			if nSrc == 0 {
   588  				src = grow(src, 0)
   589  			}
   590  		default:
   591  			return string(dst[:pDst]), pSrc, err
   592  		}
   593  	}
   594  }
   595  
   596  // Bytes returns a new byte slice with the result of converting b[:n] using t,
   597  // where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
   598  func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
   599  	t.Reset()
   600  	dst := make([]byte, len(b))
   601  	pDst, pSrc := 0, 0
   602  	for {
   603  		nDst, nSrc, err := t.Transform(dst[pDst:], b[pSrc:], true)
   604  		pDst += nDst
   605  		pSrc += nSrc
   606  		if err != ErrShortDst {
   607  			return dst[:pDst], pSrc, err
   608  		}
   609  
   610  		// Grow the destination buffer, but do not grow as long as we can make
   611  		// progress. This may avoid excessive allocations.
   612  		if nDst == 0 {
   613  			dst = grow(dst, pDst)
   614  		}
   615  	}
   616  }