github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/cases/context.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cases 6 7 import ( 8 "golang.org/x/text/transform" 9 ) 10 11 // A context is used for iterating over source bytes, fetching case info and 12 // writing to a destination buffer. 13 // 14 // Casing operations may need more than one rune of context to decide how a rune 15 // should be cased. Casing implementations should call checkpoint on context 16 // whenever it is known to be safe to return the runes processed so far. 17 // 18 // It is recommended for implementations to not allow for more than 30 case 19 // ignorables as lookahead (analogous to the limit in norm) and to use state if 20 // unbounded lookahead is needed for cased runes. 21 type context struct { 22 dst, src []byte 23 atEOF bool 24 25 pDst int // pDst points past the last written rune in dst. 26 pSrc int // pSrc points to the start of the currently scanned rune. 27 28 // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. 29 nDst, nSrc int 30 err error 31 32 sz int // size of current rune 33 info info // case information of currently scanned rune 34 35 // State preserved across calls to Transform. 36 isMidWord bool // false if next cased letter needs to be title-cased. 37 } 38 39 func (c *context) Reset() { 40 c.isMidWord = false 41 } 42 43 // ret returns the return values for the Transform method. It checks whether 44 // there were insufficient bytes in src to complete and introduces an error 45 // accordingly, if necessary. 46 func (c *context) ret() (nDst, nSrc int, err error) { 47 if c.err != nil || c.nSrc == len(c.src) { 48 return c.nDst, c.nSrc, c.err 49 } 50 // This point is only reached by mappers if there was no short destination 51 // buffer. This means that the source buffer was exhausted and that c.sz was 52 // set to 0 by next. 53 if c.atEOF && c.pSrc == len(c.src) { 54 return c.pDst, c.pSrc, nil 55 } 56 return c.nDst, c.nSrc, transform.ErrShortSrc 57 } 58 59 // checkpoint sets the return value buffer points for Transform to the current 60 // positions. 61 func (c *context) checkpoint() { 62 if c.err == nil { 63 c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz 64 } 65 } 66 67 // unreadRune causes the last rune read by next to be reread on the next 68 // invocation of next. Only one unreadRune may be called after a call to next. 69 func (c *context) unreadRune() { 70 c.sz = 0 71 } 72 73 func (c *context) next() bool { 74 c.pSrc += c.sz 75 if c.pSrc == len(c.src) || c.err != nil { 76 c.info, c.sz = 0, 0 77 return false 78 } 79 v, sz := trie.lookup(c.src[c.pSrc:]) 80 c.info, c.sz = info(v), sz 81 if c.sz == 0 { 82 if c.atEOF { 83 // A zero size means we have an incomplete rune. If we are atEOF, 84 // this means it is an illegal rune, which we will consume one 85 // byte at a time. 86 c.sz = 1 87 } else { 88 c.err = transform.ErrShortSrc 89 return false 90 } 91 } 92 return true 93 } 94 95 // writeBytes adds bytes to dst. 96 func (c *context) writeBytes(b []byte) bool { 97 if len(c.dst)-c.pDst < len(b) { 98 c.err = transform.ErrShortDst 99 return false 100 } 101 // This loop is faster than using copy. 102 for _, ch := range b { 103 c.dst[c.pDst] = ch 104 c.pDst++ 105 } 106 return true 107 } 108 109 // writeString writes the given string to dst. 110 func (c *context) writeString(s string) bool { 111 if len(c.dst)-c.pDst < len(s) { 112 c.err = transform.ErrShortDst 113 return false 114 } 115 // This loop is faster than using copy. 116 for i := 0; i < len(s); i++ { 117 c.dst[c.pDst] = s[i] 118 c.pDst++ 119 } 120 return true 121 } 122 123 // copy writes the current rune to dst. 124 func (c *context) copy() bool { 125 return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) 126 } 127 128 // copyXOR copies the current rune to dst and modifies it by applying the XOR 129 // pattern of the case info. It is the responsibility of the caller to ensure 130 // that this is a rune with a XOR pattern defined. 131 func (c *context) copyXOR() bool { 132 if !c.copy() { 133 return false 134 } 135 if c.info&xorIndexBit == 0 { 136 // Fast path for 6-bit XOR pattern, which covers most cases. 137 c.dst[c.pDst-1] ^= byte(c.info >> xorShift) 138 } else { 139 // Interpret XOR bits as an index. 140 // TODO: test performance for unrolling this loop. Verify that we have 141 // at least two bytes and at most three. 142 idx := c.info >> xorShift 143 for p := c.pDst - 1; ; p-- { 144 c.dst[p] ^= xorData[idx] 145 idx-- 146 if xorData[idx] == 0 { 147 break 148 } 149 } 150 } 151 return true 152 } 153 154 // hasPrefix returns true if src[pSrc:] starts with the given string. 155 func (c *context) hasPrefix(s string) bool { 156 b := c.src[c.pSrc:] 157 if len(b) < len(s) { 158 return false 159 } 160 for i, c := range b[:len(s)] { 161 if c != s[i] { 162 return false 163 } 164 } 165 return true 166 } 167 168 // caseType returns an info with only the case bits, normalized to either 169 // cLower, cUpper, cTitle or cUncased. 170 func (c *context) caseType() info { 171 cm := c.info & 0x7 172 if cm < 4 { 173 return cm 174 } 175 if cm >= cXORCase { 176 // xor the last bit of the rune with the case type bits. 177 b := c.src[c.pSrc+c.sz-1] 178 return info(b&1) ^ cm&0x3 179 } 180 if cm == cIgnorableCased { 181 return cLower 182 } 183 return cUncased 184 }