github.com/liquid-dev/text@v0.3.3-liquid/cases/context.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cases
     6  
     7  import "github.com/liquid-dev/text/transform"
     8  
     9  // A context is used for iterating over source bytes, fetching case info and
    10  // writing to a destination buffer.
    11  //
    12  // Casing operations may need more than one rune of context to decide how a rune
    13  // should be cased. Casing implementations should call checkpoint on context
    14  // whenever it is known to be safe to return the runes processed so far.
    15  //
    16  // It is recommended for implementations to not allow for more than 30 case
    17  // ignorables as lookahead (analogous to the limit in norm) and to use state if
    18  // unbounded lookahead is needed for cased runes.
    19  type context struct {
    20  	dst, src []byte
    21  	atEOF    bool
    22  
    23  	pDst int // pDst points past the last written rune in dst.
    24  	pSrc int // pSrc points to the start of the currently scanned rune.
    25  
    26  	// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
    27  	nDst, nSrc int
    28  	err        error
    29  
    30  	sz   int  // size of current rune
    31  	info info // case information of currently scanned rune
    32  
    33  	// State preserved across calls to Transform.
    34  	isMidWord bool // false if next cased letter needs to be title-cased.
    35  }
    36  
    37  func (c *context) Reset() {
    38  	c.isMidWord = false
    39  }
    40  
    41  // ret returns the return values for the Transform method. It checks whether
    42  // there were insufficient bytes in src to complete and introduces an error
    43  // accordingly, if necessary.
    44  func (c *context) ret() (nDst, nSrc int, err error) {
    45  	if c.err != nil || c.nSrc == len(c.src) {
    46  		return c.nDst, c.nSrc, c.err
    47  	}
    48  	// This point is only reached by mappers if there was no short destination
    49  	// buffer. This means that the source buffer was exhausted and that c.sz was
    50  	// set to 0 by next.
    51  	if c.atEOF && c.pSrc == len(c.src) {
    52  		return c.pDst, c.pSrc, nil
    53  	}
    54  	return c.nDst, c.nSrc, transform.ErrShortSrc
    55  }
    56  
    57  // retSpan returns the return values for the Span method. It checks whether
    58  // there were insufficient bytes in src to complete and introduces an error
    59  // accordingly, if necessary.
    60  func (c *context) retSpan() (n int, err error) {
    61  	_, nSrc, err := c.ret()
    62  	return nSrc, err
    63  }
    64  
    65  // checkpoint sets the return value buffer points for Transform to the current
    66  // positions.
    67  func (c *context) checkpoint() {
    68  	if c.err == nil {
    69  		c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
    70  	}
    71  }
    72  
    73  // unreadRune causes the last rune read by next to be reread on the next
    74  // invocation of next. Only one unreadRune may be called after a call to next.
    75  func (c *context) unreadRune() {
    76  	c.sz = 0
    77  }
    78  
    79  func (c *context) next() bool {
    80  	c.pSrc += c.sz
    81  	if c.pSrc == len(c.src) || c.err != nil {
    82  		c.info, c.sz = 0, 0
    83  		return false
    84  	}
    85  	v, sz := trie.lookup(c.src[c.pSrc:])
    86  	c.info, c.sz = info(v), sz
    87  	if c.sz == 0 {
    88  		if c.atEOF {
    89  			// A zero size means we have an incomplete rune. If we are atEOF,
    90  			// this means it is an illegal rune, which we will consume one
    91  			// byte at a time.
    92  			c.sz = 1
    93  		} else {
    94  			c.err = transform.ErrShortSrc
    95  			return false
    96  		}
    97  	}
    98  	return true
    99  }
   100  
   101  // writeBytes adds bytes to dst.
   102  func (c *context) writeBytes(b []byte) bool {
   103  	if len(c.dst)-c.pDst < len(b) {
   104  		c.err = transform.ErrShortDst
   105  		return false
   106  	}
   107  	// This loop is faster than using copy.
   108  	for _, ch := range b {
   109  		c.dst[c.pDst] = ch
   110  		c.pDst++
   111  	}
   112  	return true
   113  }
   114  
   115  // writeString writes the given string to dst.
   116  func (c *context) writeString(s string) bool {
   117  	if len(c.dst)-c.pDst < len(s) {
   118  		c.err = transform.ErrShortDst
   119  		return false
   120  	}
   121  	// This loop is faster than using copy.
   122  	for i := 0; i < len(s); i++ {
   123  		c.dst[c.pDst] = s[i]
   124  		c.pDst++
   125  	}
   126  	return true
   127  }
   128  
   129  // copy writes the current rune to dst.
   130  func (c *context) copy() bool {
   131  	return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
   132  }
   133  
   134  // copyXOR copies the current rune to dst and modifies it by applying the XOR
   135  // pattern of the case info. It is the responsibility of the caller to ensure
   136  // that this is a rune with a XOR pattern defined.
   137  func (c *context) copyXOR() bool {
   138  	if !c.copy() {
   139  		return false
   140  	}
   141  	if c.info&xorIndexBit == 0 {
   142  		// Fast path for 6-bit XOR pattern, which covers most cases.
   143  		c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
   144  	} else {
   145  		// Interpret XOR bits as an index.
   146  		// TODO: test performance for unrolling this loop. Verify that we have
   147  		// at least two bytes and at most three.
   148  		idx := c.info >> xorShift
   149  		for p := c.pDst - 1; ; p-- {
   150  			c.dst[p] ^= xorData[idx]
   151  			idx--
   152  			if xorData[idx] == 0 {
   153  				break
   154  			}
   155  		}
   156  	}
   157  	return true
   158  }
   159  
   160  // hasPrefix returns true if src[pSrc:] starts with the given string.
   161  func (c *context) hasPrefix(s string) bool {
   162  	b := c.src[c.pSrc:]
   163  	if len(b) < len(s) {
   164  		return false
   165  	}
   166  	for i, c := range b[:len(s)] {
   167  		if c != s[i] {
   168  			return false
   169  		}
   170  	}
   171  	return true
   172  }
   173  
   174  // caseType returns an info with only the case bits, normalized to either
   175  // cLower, cUpper, cTitle or cUncased.
   176  func (c *context) caseType() info {
   177  	cm := c.info & 0x7
   178  	if cm < 4 {
   179  		return cm
   180  	}
   181  	if cm >= cXORCase {
   182  		// xor the last bit of the rune with the case type bits.
   183  		b := c.src[c.pSrc+c.sz-1]
   184  		return info(b&1) ^ cm&0x3
   185  	}
   186  	if cm == cIgnorableCased {
   187  		return cLower
   188  	}
   189  	return cUncased
   190  }
   191  
   192  // lower writes the lowercase version of the current rune to dst.
   193  func lower(c *context) bool {
   194  	ct := c.caseType()
   195  	if c.info&hasMappingMask == 0 || ct == cLower {
   196  		return c.copy()
   197  	}
   198  	if c.info&exceptionBit == 0 {
   199  		return c.copyXOR()
   200  	}
   201  	e := exceptions[c.info>>exceptionShift:]
   202  	offset := 2 + e[0]&lengthMask // size of header + fold string
   203  	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
   204  		return c.writeString(e[offset : offset+nLower])
   205  	}
   206  	return c.copy()
   207  }
   208  
   209  func isLower(c *context) bool {
   210  	ct := c.caseType()
   211  	if c.info&hasMappingMask == 0 || ct == cLower {
   212  		return true
   213  	}
   214  	if c.info&exceptionBit == 0 {
   215  		c.err = transform.ErrEndOfSpan
   216  		return false
   217  	}
   218  	e := exceptions[c.info>>exceptionShift:]
   219  	if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
   220  		c.err = transform.ErrEndOfSpan
   221  		return false
   222  	}
   223  	return true
   224  }
   225  
   226  // upper writes the uppercase version of the current rune to dst.
   227  func upper(c *context) bool {
   228  	ct := c.caseType()
   229  	if c.info&hasMappingMask == 0 || ct == cUpper {
   230  		return c.copy()
   231  	}
   232  	if c.info&exceptionBit == 0 {
   233  		return c.copyXOR()
   234  	}
   235  	e := exceptions[c.info>>exceptionShift:]
   236  	offset := 2 + e[0]&lengthMask // size of header + fold string
   237  	// Get length of first special case mapping.
   238  	n := (e[1] >> lengthBits) & lengthMask
   239  	if ct == cTitle {
   240  		// The first special case mapping is for lower. Set n to the second.
   241  		if n == noChange {
   242  			n = 0
   243  		}
   244  		n, e = e[1]&lengthMask, e[n:]
   245  	}
   246  	if n != noChange {
   247  		return c.writeString(e[offset : offset+n])
   248  	}
   249  	return c.copy()
   250  }
   251  
   252  // isUpper writes the isUppercase version of the current rune to dst.
   253  func isUpper(c *context) bool {
   254  	ct := c.caseType()
   255  	if c.info&hasMappingMask == 0 || ct == cUpper {
   256  		return true
   257  	}
   258  	if c.info&exceptionBit == 0 {
   259  		c.err = transform.ErrEndOfSpan
   260  		return false
   261  	}
   262  	e := exceptions[c.info>>exceptionShift:]
   263  	// Get length of first special case mapping.
   264  	n := (e[1] >> lengthBits) & lengthMask
   265  	if ct == cTitle {
   266  		n = e[1] & lengthMask
   267  	}
   268  	if n != noChange {
   269  		c.err = transform.ErrEndOfSpan
   270  		return false
   271  	}
   272  	return true
   273  }
   274  
   275  // title writes the title case version of the current rune to dst.
   276  func title(c *context) bool {
   277  	ct := c.caseType()
   278  	if c.info&hasMappingMask == 0 || ct == cTitle {
   279  		return c.copy()
   280  	}
   281  	if c.info&exceptionBit == 0 {
   282  		if ct == cLower {
   283  			return c.copyXOR()
   284  		}
   285  		return c.copy()
   286  	}
   287  	// Get the exception data.
   288  	e := exceptions[c.info>>exceptionShift:]
   289  	offset := 2 + e[0]&lengthMask // size of header + fold string
   290  
   291  	nFirst := (e[1] >> lengthBits) & lengthMask
   292  	if nTitle := e[1] & lengthMask; nTitle != noChange {
   293  		if nFirst != noChange {
   294  			e = e[nFirst:]
   295  		}
   296  		return c.writeString(e[offset : offset+nTitle])
   297  	}
   298  	if ct == cLower && nFirst != noChange {
   299  		// Use the uppercase version instead.
   300  		return c.writeString(e[offset : offset+nFirst])
   301  	}
   302  	// Already in correct case.
   303  	return c.copy()
   304  }
   305  
   306  // isTitle reports whether the current rune is in title case.
   307  func isTitle(c *context) bool {
   308  	ct := c.caseType()
   309  	if c.info&hasMappingMask == 0 || ct == cTitle {
   310  		return true
   311  	}
   312  	if c.info&exceptionBit == 0 {
   313  		if ct == cLower {
   314  			c.err = transform.ErrEndOfSpan
   315  			return false
   316  		}
   317  		return true
   318  	}
   319  	// Get the exception data.
   320  	e := exceptions[c.info>>exceptionShift:]
   321  	if nTitle := e[1] & lengthMask; nTitle != noChange {
   322  		c.err = transform.ErrEndOfSpan
   323  		return false
   324  	}
   325  	nFirst := (e[1] >> lengthBits) & lengthMask
   326  	if ct == cLower && nFirst != noChange {
   327  		c.err = transform.ErrEndOfSpan
   328  		return false
   329  	}
   330  	return true
   331  }
   332  
   333  // foldFull writes the foldFull version of the current rune to dst.
   334  func foldFull(c *context) bool {
   335  	if c.info&hasMappingMask == 0 {
   336  		return c.copy()
   337  	}
   338  	ct := c.caseType()
   339  	if c.info&exceptionBit == 0 {
   340  		if ct != cLower || c.info&inverseFoldBit != 0 {
   341  			return c.copyXOR()
   342  		}
   343  		return c.copy()
   344  	}
   345  	e := exceptions[c.info>>exceptionShift:]
   346  	n := e[0] & lengthMask
   347  	if n == 0 {
   348  		if ct == cLower {
   349  			return c.copy()
   350  		}
   351  		n = (e[1] >> lengthBits) & lengthMask
   352  	}
   353  	return c.writeString(e[2 : 2+n])
   354  }
   355  
   356  // isFoldFull reports whether the current run is mapped to foldFull
   357  func isFoldFull(c *context) bool {
   358  	if c.info&hasMappingMask == 0 {
   359  		return true
   360  	}
   361  	ct := c.caseType()
   362  	if c.info&exceptionBit == 0 {
   363  		if ct != cLower || c.info&inverseFoldBit != 0 {
   364  			c.err = transform.ErrEndOfSpan
   365  			return false
   366  		}
   367  		return true
   368  	}
   369  	e := exceptions[c.info>>exceptionShift:]
   370  	n := e[0] & lengthMask
   371  	if n == 0 && ct == cLower {
   372  		return true
   373  	}
   374  	c.err = transform.ErrEndOfSpan
   375  	return false
   376  }