github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/encoding/json/fold.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "unicode/utf8" 10 ) 11 12 const ( 13 caseMask = ^byte(0x20) // Mask to ignore case in ASCII. 14 kelvin = '\u212a' 15 smallLongEss = '\u017f' 16 ) 17 18 // foldFunc returns one of four different case folding equivalence 19 // functions, from most general (and slow) to fastest: 20 // 21 // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 22 // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') 23 // 3) asciiEqualFold, no special, but includes non-letters (including _) 24 // 4) simpleLetterEqualFold, no specials, no non-letters. 25 // 26 // The letters S and K are special because they map to 3 runes, not just 2: 27 // * S maps to s and to U+017F 'ſ' Latin small letter long s 28 // * k maps to K and to U+212A 'K' Kelvin sign 29 // See http://play.golang.org/p/tTxjOc0OGo 30 // 31 // The returned function is specialized for matching against s and 32 // should only be given s. It's not curried for performance reasons. 33 func foldFunc(s []byte) func(s, t []byte) bool { 34 nonLetter := false 35 special := false // special letter 36 for _, b := range s { 37 if b >= utf8.RuneSelf { 38 return bytes.EqualFold 39 } 40 upper := b & caseMask 41 if upper < 'A' || upper > 'Z' { 42 nonLetter = true 43 } else if upper == 'K' || upper == 'S' { 44 // See above for why these letters are special. 45 special = true 46 } 47 } 48 if special { 49 return equalFoldRight 50 } 51 if nonLetter { 52 return asciiEqualFold 53 } 54 return simpleLetterEqualFold 55 } 56 57 // equalFoldRight is a specialization of bytes.EqualFold when s is 58 // known to be all ASCII (including punctuation), but contains an 's', 59 // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. 60 // See comments on foldFunc. 61 func equalFoldRight(s, t []byte) bool { 62 for _, sb := range s { 63 if len(t) == 0 { 64 return false 65 } 66 tb := t[0] 67 if tb < utf8.RuneSelf { 68 if sb != tb { 69 sbUpper := sb & caseMask 70 if 'A' <= sbUpper && sbUpper <= 'Z' { 71 if sbUpper != tb&caseMask { 72 return false 73 } 74 } else { 75 return false 76 } 77 } 78 t = t[1:] 79 continue 80 } 81 // sb is ASCII and t is not. t must be either kelvin 82 // sign or long s; sb must be s, S, k, or K. 83 tr, size := utf8.DecodeRune(t) 84 switch sb { 85 case 's', 'S': 86 if tr != smallLongEss { 87 return false 88 } 89 case 'k', 'K': 90 if tr != kelvin { 91 return false 92 } 93 default: 94 return false 95 } 96 t = t[size:] 97 98 } 99 if len(t) > 0 { 100 return false 101 } 102 return true 103 } 104 105 // asciiEqualFold is a specialization of bytes.EqualFold for use when 106 // s is all ASCII (but may contain non-letters) and contains no 107 // special-folding letters. 108 // See comments on foldFunc. 109 func asciiEqualFold(s, t []byte) bool { 110 if len(s) != len(t) { 111 return false 112 } 113 for i, sb := range s { 114 tb := t[i] 115 if sb == tb { 116 continue 117 } 118 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { 119 if sb&caseMask != tb&caseMask { 120 return false 121 } 122 } else { 123 return false 124 } 125 } 126 return true 127 } 128 129 // simpleLetterEqualFold is a specialization of bytes.EqualFold for 130 // use when s is all ASCII letters (no underscores, etc) and also 131 // doesn't contain 'k', 'K', 's', or 'S'. 132 // See comments on foldFunc. 133 func simpleLetterEqualFold(s, t []byte) bool { 134 if len(s) != len(t) { 135 return false 136 } 137 for i, b := range s { 138 if b&caseMask != t[i]&caseMask { 139 return false 140 } 141 } 142 return true 143 }