gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/encoding/fold.go (about) 1 package encoding 2 3 import ( 4 "bytes" 5 "unicode/utf8" 6 ) 7 8 const ( 9 caseMask = ^byte(0x20) // Mask to ignore case in ASCII. 10 kelvin = '\u212a' 11 smallLongEss = '\u017f' 12 ) 13 14 // foldFunc returns one of four different case folding equivalence 15 // functions, from most general (and slow) to fastest: 16 // 17 // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 18 // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') 19 // 3) asciiEqualFold, no special, but includes non-letters (including _) 20 // 4) simpleLetterEqualFold, no specials, no non-letters. 21 // 22 // The letters S and K are special because they map to 3 runes, not just 2: 23 // * S maps to s and to U+017F 'ſ' Latin small letter long s 24 // * k maps to K and to U+212A 'K' Kelvin sign 25 // See http://play.golang.org/p/tTxjOc0OGo 26 // 27 // The returned function is specialized for matching against s and 28 // should only be given s. It's not curried for performance reasons. 29 func foldFunc(s []byte) func(s, t []byte) bool { 30 nonLetter := false 31 special := false // special letter 32 for _, b := range s { 33 if b >= utf8.RuneSelf { 34 return bytes.EqualFold 35 } 36 upper := b & caseMask 37 if upper < 'A' || upper > 'Z' { 38 nonLetter = true 39 } else if upper == 'K' || upper == 'S' { 40 // See above for why these letters are special. 41 special = true 42 } 43 } 44 if special { 45 return equalFoldRight 46 } 47 if nonLetter { 48 return asciiEqualFold 49 } 50 return simpleLetterEqualFold 51 } 52 53 // equalFoldRight is a specialization of bytes.EqualFold when s is 54 // known to be all ASCII (including punctuation), but contains an 's', 55 // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. 56 // See comments on foldFunc. 57 func equalFoldRight(s, t []byte) bool { 58 for _, sb := range s { 59 if len(t) == 0 { 60 return false 61 } 62 tb := t[0] 63 if tb < utf8.RuneSelf { 64 if sb != tb { 65 sbUpper := sb & caseMask 66 if 'A' <= sbUpper && sbUpper <= 'Z' { 67 if sbUpper != tb&caseMask { 68 return false 69 } 70 } else { 71 return false 72 } 73 } 74 t = t[1:] 75 continue 76 } 77 // sb is ASCII and t is not. t must be either kelvin 78 // sign or long s; sb must be s, S, k, or K. 79 tr, size := utf8.DecodeRune(t) 80 switch sb { 81 case 's', 'S': 82 if tr != smallLongEss { 83 return false 84 } 85 case 'k', 'K': 86 if tr != kelvin { 87 return false 88 } 89 default: 90 return false 91 } 92 t = t[size:] 93 94 } 95 if len(t) > 0 { 96 return false 97 } 98 return true 99 } 100 101 // asciiEqualFold is a specialization of bytes.EqualFold for use when 102 // s is all ASCII (but may contain non-letters) and contains no 103 // special-folding letters. 104 // See comments on foldFunc. 105 func asciiEqualFold(s, t []byte) bool { 106 if len(s) != len(t) { 107 return false 108 } 109 for i, sb := range s { 110 tb := t[i] 111 if sb == tb { 112 continue 113 } 114 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { 115 if sb&caseMask != tb&caseMask { 116 return false 117 } 118 } else { 119 return false 120 } 121 } 122 return true 123 } 124 125 // simpleLetterEqualFold is a specialization of bytes.EqualFold for 126 // use when s is all ASCII letters (no underscores, etc) and also 127 // doesn't contain 'k', 'K', 's', or 'S'. 128 // See comments on foldFunc. 129 func simpleLetterEqualFold(s, t []byte) bool { 130 if len(s) != len(t) { 131 return false 132 } 133 for i, b := range s { 134 if b&caseMask != t[i]&caseMask { 135 return false 136 } 137 } 138 return true 139 }