gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/encoding/fold.go (about)

     1  package encoding
     2  
     3  import (
     4  	"bytes"
     5  	"unicode/utf8"
     6  )
     7  
     8  const (
     9  	caseMask     = ^byte(0x20) // Mask to ignore case in ASCII.
    10  	kelvin       = '\u212a'
    11  	smallLongEss = '\u017f'
    12  )
    13  
    14  // foldFunc returns one of four different case folding equivalence
    15  // functions, from most general (and slow) to fastest:
    16  //
    17  // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
    18  // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
    19  // 3) asciiEqualFold, no special, but includes non-letters (including _)
    20  // 4) simpleLetterEqualFold, no specials, no non-letters.
    21  //
    22  // The letters S and K are special because they map to 3 runes, not just 2:
    23  //  * S maps to s and to U+017F 'ſ' Latin small letter long s
    24  //  * k maps to K and to U+212A 'K' Kelvin sign
    25  // See http://play.golang.org/p/tTxjOc0OGo
    26  //
    27  // The returned function is specialized for matching against s and
    28  // should only be given s. It's not curried for performance reasons.
    29  func foldFunc(s []byte) func(s, t []byte) bool {
    30  	nonLetter := false
    31  	special := false // special letter
    32  	for _, b := range s {
    33  		if b >= utf8.RuneSelf {
    34  			return bytes.EqualFold
    35  		}
    36  		upper := b & caseMask
    37  		if upper < 'A' || upper > 'Z' {
    38  			nonLetter = true
    39  		} else if upper == 'K' || upper == 'S' {
    40  			// See above for why these letters are special.
    41  			special = true
    42  		}
    43  	}
    44  	if special {
    45  		return equalFoldRight
    46  	}
    47  	if nonLetter {
    48  		return asciiEqualFold
    49  	}
    50  	return simpleLetterEqualFold
    51  }
    52  
    53  // equalFoldRight is a specialization of bytes.EqualFold when s is
    54  // known to be all ASCII (including punctuation), but contains an 's',
    55  // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
    56  // See comments on foldFunc.
    57  func equalFoldRight(s, t []byte) bool {
    58  	for _, sb := range s {
    59  		if len(t) == 0 {
    60  			return false
    61  		}
    62  		tb := t[0]
    63  		if tb < utf8.RuneSelf {
    64  			if sb != tb {
    65  				sbUpper := sb & caseMask
    66  				if 'A' <= sbUpper && sbUpper <= 'Z' {
    67  					if sbUpper != tb&caseMask {
    68  						return false
    69  					}
    70  				} else {
    71  					return false
    72  				}
    73  			}
    74  			t = t[1:]
    75  			continue
    76  		}
    77  		// sb is ASCII and t is not. t must be either kelvin
    78  		// sign or long s; sb must be s, S, k, or K.
    79  		tr, size := utf8.DecodeRune(t)
    80  		switch sb {
    81  		case 's', 'S':
    82  			if tr != smallLongEss {
    83  				return false
    84  			}
    85  		case 'k', 'K':
    86  			if tr != kelvin {
    87  				return false
    88  			}
    89  		default:
    90  			return false
    91  		}
    92  		t = t[size:]
    93  
    94  	}
    95  	if len(t) > 0 {
    96  		return false
    97  	}
    98  	return true
    99  }
   100  
   101  // asciiEqualFold is a specialization of bytes.EqualFold for use when
   102  // s is all ASCII (but may contain non-letters) and contains no
   103  // special-folding letters.
   104  // See comments on foldFunc.
   105  func asciiEqualFold(s, t []byte) bool {
   106  	if len(s) != len(t) {
   107  		return false
   108  	}
   109  	for i, sb := range s {
   110  		tb := t[i]
   111  		if sb == tb {
   112  			continue
   113  		}
   114  		if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
   115  			if sb&caseMask != tb&caseMask {
   116  				return false
   117  			}
   118  		} else {
   119  			return false
   120  		}
   121  	}
   122  	return true
   123  }
   124  
   125  // simpleLetterEqualFold is a specialization of bytes.EqualFold for
   126  // use when s is all ASCII letters (no underscores, etc) and also
   127  // doesn't contain 'k', 'K', 's', or 'S'.
   128  // See comments on foldFunc.
   129  func simpleLetterEqualFold(s, t []byte) bool {
   130  	if len(s) != len(t) {
   131  		return false
   132  	}
   133  	for i, b := range s {
   134  		if b&caseMask != t[i]&caseMask {
   135  			return false
   136  		}
   137  	}
   138  	return true
   139  }