github.com/thiagoyeds/go-cloud@v0.26.0/docstore/internal/fields/fold.go (about) 1 // Copyright 2019 The Go Cloud Development Kit Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fields 16 17 // This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go. 18 // Only the license and package were changed. 19 20 import ( 21 "bytes" 22 "unicode/utf8" 23 ) 24 25 const ( 26 caseMask = ^byte(0x20) // Mask to ignore case in ASCII. 27 kelvin = '\u212a' 28 smallLongEss = '\u017f' 29 ) 30 31 // foldFunc returns one of four different case folding equivalence 32 // functions, from most general (and slow) to fastest: 33 // 34 // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 35 // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') 36 // 3) asciiEqualFold, no special, but includes non-letters (including _) 37 // 4) simpleLetterEqualFold, no specials, no non-letters. 38 // 39 // The letters S and K are special because they map to 3 runes, not just 2: 40 // * S maps to s and to U+017F 'ſ' Latin small letter long s 41 // * k maps to K and to U+212A 'K' Kelvin sign 42 // See https://play.golang.org/p/tTxjOc0OGo 43 // 44 // The returned function is specialized for matching against s and 45 // should only be given s. It's not curried for performance reasons. 46 func foldFunc(s []byte) func(s, t []byte) bool { 47 nonLetter := false 48 special := false // special letter 49 for _, b := range s { 50 if b >= utf8.RuneSelf { 51 return bytes.EqualFold 52 } 53 upper := b & caseMask 54 if upper < 'A' || upper > 'Z' { 55 nonLetter = true 56 } else if upper == 'K' || upper == 'S' { 57 // See above for why these letters are special. 58 special = true 59 } 60 } 61 if special { 62 return equalFoldRight 63 } 64 if nonLetter { 65 return asciiEqualFold 66 } 67 return simpleLetterEqualFold 68 } 69 70 // equalFoldRight is a specialization of bytes.EqualFold when s is 71 // known to be all ASCII (including punctuation), but contains an 's', 72 // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. 73 // See comments on foldFunc. 74 func equalFoldRight(s, t []byte) bool { 75 for _, sb := range s { 76 if len(t) == 0 { 77 return false 78 } 79 tb := t[0] 80 if tb < utf8.RuneSelf { 81 if sb != tb { 82 sbUpper := sb & caseMask 83 if 'A' <= sbUpper && sbUpper <= 'Z' { 84 if sbUpper != tb&caseMask { 85 return false 86 } 87 } else { 88 return false 89 } 90 } 91 t = t[1:] 92 continue 93 } 94 // sb is ASCII and t is not. t must be either kelvin 95 // sign or long s; sb must be s, S, k, or K. 96 tr, size := utf8.DecodeRune(t) 97 switch sb { 98 case 's', 'S': 99 if tr != smallLongEss { 100 return false 101 } 102 case 'k', 'K': 103 if tr != kelvin { 104 return false 105 } 106 default: 107 return false 108 } 109 t = t[size:] 110 111 } 112 if len(t) > 0 { 113 return false 114 } 115 return true 116 } 117 118 // asciiEqualFold is a specialization of bytes.EqualFold for use when 119 // s is all ASCII (but may contain non-letters) and contains no 120 // special-folding letters. 121 // See comments on foldFunc. 122 func asciiEqualFold(s, t []byte) bool { 123 if len(s) != len(t) { 124 return false 125 } 126 for i, sb := range s { 127 tb := t[i] 128 if sb == tb { 129 continue 130 } 131 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { 132 if sb&caseMask != tb&caseMask { 133 return false 134 } 135 } else { 136 return false 137 } 138 } 139 return true 140 } 141 142 // simpleLetterEqualFold is a specialization of bytes.EqualFold for 143 // use when s is all ASCII letters (no underscores, etc) and also 144 // doesn't contain 'k', 'K', 's', or 'S'. 145 // See comments on foldFunc. 146 func simpleLetterEqualFold(s, t []byte) bool { 147 if len(s) != len(t) { 148 return false 149 } 150 for i, b := range s { 151 if b&caseMask != t[i]&caseMask { 152 return false 153 } 154 } 155 return true 156 }