github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/cases/context.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cases
     6  
     7  import (
     8  	"golang.org/x/text/transform"
     9  )
    10  
    11  // A context is used for iterating over source bytes, fetching case info and
    12  // writing to a destination buffer.
    13  //
    14  // Casing operations may need more than one rune of context to decide how a rune
    15  // should be cased. Casing implementations should call checkpoint on context
    16  // whenever it is known to be safe to return the runes processed so far.
    17  //
    18  // It is recommended for implementations to not allow for more than 30 case
    19  // ignorables as lookahead (analogous to the limit in norm) and to use state if
    20  // unbounded lookahead is needed for cased runes.
    21  type context struct {
    22  	dst, src []byte
    23  	atEOF    bool
    24  
    25  	pDst int // pDst points past the last written rune in dst.
    26  	pSrc int // pSrc points to the start of the currently scanned rune.
    27  
    28  	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
    29  	nDst, nSrc int
    30  	err        error
    31  
    32  	sz   int  // size of current rune
    33  	info info // case information of currently scanned rune
    34  
    35  	// State preserved across calls to Transform.
    36  	isMidWord bool // false if next cased letter needs to be title-cased.
    37  }
    38  
    39  func (c *context) Reset() {
    40  	c.isMidWord = false
    41  }
    42  
    43  // ret returns the return values for the Transform method. It checks whether
    44  // there were insufficient bytes in src to complete and introduces an error
    45  // accordingly, if necessary.
    46  func (c *context) ret() (nDst, nSrc int, err error) {
    47  	if c.err != nil || c.nSrc == len(c.src) {
    48  		return c.nDst, c.nSrc, c.err
    49  	}
    50  	// This point is only reached by mappers if there was no short destination
    51  	// buffer. This means that the source buffer was exhausted and that c.sz was
    52  	// set to 0 by next.
    53  	if c.atEOF && c.pSrc == len(c.src) {
    54  		return c.pDst, c.pSrc, nil
    55  	}
    56  	return c.nDst, c.nSrc, transform.ErrShortSrc
    57  }
    58  
    59  // checkpoint sets the return value buffer points for Transform to the current
    60  // positions.
    61  func (c *context) checkpoint() {
    62  	if c.err == nil {
    63  		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
    64  	}
    65  }
    66  
    67  // unreadRune causes the last rune read by next to be reread on the next
    68  // invocation of next. Only one unreadRune may be called after a call to next.
    69  func (c *context) unreadRune() {
    70  	c.sz = 0
    71  }
    72  
    73  func (c *context) next() bool {
    74  	c.pSrc += c.sz
    75  	if c.pSrc == len(c.src) || c.err != nil {
    76  		c.info, c.sz = 0, 0
    77  		return false
    78  	}
    79  	v, sz := trie.lookup(c.src[c.pSrc:])
    80  	c.info, c.sz = info(v), sz
    81  	if c.sz == 0 {
    82  		if c.atEOF {
    83  			// A zero size means we have an incomplete rune. If we are atEOF,
    84  			// this means it is an illegal rune, which we will consume one
    85  			// byte at a time.
    86  			c.sz = 1
    87  		} else {
    88  			c.err = transform.ErrShortSrc
    89  			return false
    90  		}
    91  	}
    92  	return true
    93  }
    94  
    95  // writeBytes adds bytes to dst.
    96  func (c *context) writeBytes(b []byte) bool {
    97  	if len(c.dst)-c.pDst < len(b) {
    98  		c.err = transform.ErrShortDst
    99  		return false
   100  	}
   101  	// This loop is faster than using copy.
   102  	for _, ch := range b {
   103  		c.dst[c.pDst] = ch
   104  		c.pDst++
   105  	}
   106  	return true
   107  }
   108  
   109  // writeString writes the given string to dst.
   110  func (c *context) writeString(s string) bool {
   111  	if len(c.dst)-c.pDst < len(s) {
   112  		c.err = transform.ErrShortDst
   113  		return false
   114  	}
   115  	// This loop is faster than using copy.
   116  	for i := 0; i < len(s); i++ {
   117  		c.dst[c.pDst] = s[i]
   118  		c.pDst++
   119  	}
   120  	return true
   121  }
   122  
   123  // copy writes the current rune to dst.
   124  func (c *context) copy() bool {
   125  	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
   126  }
   127  
   128  // copyXOR copies the current rune to dst and modifies it by applying the XOR
   129  // pattern of the case info. It is the responsibility of the caller to ensure
   130  // that this is a rune with a XOR pattern defined.
   131  func (c *context) copyXOR() bool {
   132  	if !c.copy() {
   133  		return false
   134  	}
   135  	if c.info&xorIndexBit == 0 {
   136  		// Fast path for 6-bit XOR pattern, which covers most cases.
   137  		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
   138  	} else {
   139  		// Interpret XOR bits as an index.
   140  		// TODO: test performance for unrolling this loop. Verify that we have
   141  		// at least two bytes and at most three.
   142  		idx := c.info >> xorShift
   143  		for p := c.pDst - 1; ; p-- {
   144  			c.dst[p] ^= xorData[idx]
   145  			idx--
   146  			if xorData[idx] == 0 {
   147  				break
   148  			}
   149  		}
   150  	}
   151  	return true
   152  }
   153  
   154  // hasPrefix returns true if src[pSrc:] starts with the given string.
   155  func (c *context) hasPrefix(s string) bool {
   156  	b := c.src[c.pSrc:]
   157  	if len(b) < len(s) {
   158  		return false
   159  	}
   160  	for i, c := range b[:len(s)] {
   161  		if c != s[i] {
   162  			return false
   163  		}
   164  	}
   165  	return true
   166  }
   167  
   168  // caseType returns an info with only the case bits, normalized to either
   169  // cLower, cUpper, cTitle or cUncased.
   170  func (c *context) caseType() info {
   171  	cm := c.info & 0x7
   172  	if cm < 4 {
   173  		return cm
   174  	}
   175  	if cm >= cXORCase {
   176  		// xor the last bit of the rune with the case type bits.
   177  		b := c.src[c.pSrc+c.sz-1]
   178  		return info(b&1) ^ cm&0x3
   179  	}
   180  	if cm == cIgnorableCased {
   181  		return cLower
   182  	}
   183  	return cUncased
   184  }