golang.org/x/text@v0.14.0/secure/precis/enforce10.0.0_test.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build go1.10 6 7 package precis 8 9 import ( 10 "strings" 11 12 "golang.org/x/text/runes" 13 "golang.org/x/text/secure/bidirule" 14 ) 15 16 var enforceTestCases = []struct { 17 name string 18 p *Profile 19 cases []testCase 20 }{ 21 {"Basic", NewFreeform(), []testCase{ 22 {"e\u0301\u031f", "\u00e9\u031f", nil}, // normalize 23 }}, 24 25 {"Context Rule 1", NewFreeform(), []testCase{ 26 // Rule 1: zero-width non-joiner (U+200C) 27 // From RFC: 28 // False 29 // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; 30 // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C 31 // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; 32 // 33 // Example runes for different joining types: 34 // Join L: U+A872; PHAGS-PA SUPERFIXED LETTER RA 35 // Join D: U+062C; HAH WITH DOT BELOW 36 // Join T: U+0610; ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM 37 // Join R: U+0627; ALEF 38 // Virama: U+0A4D; GURMUKHI SIGN VIRAMA 39 // Virama and Join T: U+0ACD; GUJARATI SIGN VIRAMA 40 {"\u200c", "", errContext}, 41 {"\u200ca", "", errContext}, 42 {"a\u200c", "", errContext}, 43 {"\u200c\u0627", "", errContext}, // missing JoinStart 44 {"\u062c\u200c", "", errContext}, // missing JoinEnd 45 {"\u0610\u200c\u0610\u0627", "", errContext}, // missing JoinStart 46 {"\u062c\u0610\u200c\u0610", "", errContext}, // missing JoinEnd 47 48 // Variants of: D T* U+200c T* R 49 {"\u062c\u200c\u0627", "\u062c\u200c\u0627", nil}, 50 {"\u062c\u0610\u200c\u0610\u0627", "\u062c\u0610\u200c\u0610\u0627", nil}, 51 {"\u062c\u0610\u0610\u200c\u0610\u0610\u0627", "\u062c\u0610\u0610\u200c\u0610\u0610\u0627", nil}, 52 {"\u062c\u0610\u200c\u0627", "\u062c\u0610\u200c\u0627", nil}, 53 {"\u062c\u200c\u0610\u0627", "\u062c\u200c\u0610\u0627", nil}, 54 55 // Variants of: L T* U+200c T* D 56 {"\ua872\u200c\u062c", "\ua872\u200c\u062c", nil}, 57 {"\ua872\u0610\u200c\u0610\u062c", "\ua872\u0610\u200c\u0610\u062c", nil}, 58 {"\ua872\u0610\u0610\u200c\u0610\u0610\u062c", "\ua872\u0610\u0610\u200c\u0610\u0610\u062c", nil}, 59 {"\ua872\u0610\u200c\u062c", "\ua872\u0610\u200c\u062c", nil}, 60 {"\ua872\u200c\u0610\u062c", "\ua872\u200c\u0610\u062c", nil}, 61 62 // Virama 63 {"\u0a4d\u200c", "\u0a4d\u200c", nil}, 64 {"\ua872\u0a4d\u200c", "\ua872\u0a4d\u200c", nil}, 65 {"\ua872\u0a4d\u0610\u200c", "", errContext}, 66 {"\ua872\u0a4d\u0610\u200c", "", errContext}, 67 68 {"\u0acd\u200c", "\u0acd\u200c", nil}, 69 {"\ua872\u0acd\u200c", "\ua872\u0acd\u200c", nil}, 70 {"\ua872\u0acd\u0610\u200c", "", errContext}, 71 {"\ua872\u0acd\u0610\u200c", "", errContext}, 72 73 // Using Virama as join T 74 {"\ua872\u0acd\u200c\u062c", "\ua872\u0acd\u200c\u062c", nil}, 75 {"\ua872\u200c\u0acd\u062c", "\ua872\u200c\u0acd\u062c", nil}, 76 }}, 77 78 {"Context Rule 2", NewFreeform(), []testCase{ 79 // Rule 2: zero-width joiner (U+200D) 80 {"\u200d", "", errContext}, 81 {"\u200da", "", errContext}, 82 {"a\u200d", "", errContext}, 83 84 {"\u0a4d\u200d", "\u0a4d\u200d", nil}, 85 {"\ua872\u0a4d\u200d", "\ua872\u0a4d\u200d", nil}, 86 {"\u0a4da\u200d", "", errContext}, 87 }}, 88 89 {"Context Rule 3", NewFreeform(), []testCase{ 90 // Rule 3: middle dot 91 {"·", "", errContext}, 92 {"l·", "", errContext}, 93 {"·l", "", errContext}, 94 {"a·", "", errContext}, 95 {"l·a", "", errContext}, 96 {"a·a", "", errContext}, 97 {"l·l", "l·l", nil}, 98 {"al·la", "al·la", nil}, 99 }}, 100 101 {"Context Rule 4", NewFreeform(), []testCase{ 102 // Rule 4: Greek lower numeral U+0375 103 {"͵", "", errContext}, 104 {"͵a", "", errContext}, 105 {"α͵", "", errContext}, 106 {"͵α", "͵α", nil}, 107 {"α͵α", "α͵α", nil}, 108 {"͵͵α", "͵͵α", nil}, // The numeric sign is itself Greek. 109 {"α͵͵α", "α͵͵α", nil}, 110 {"α͵͵", "", errContext}, 111 {"α͵͵a", "", errContext}, 112 }}, 113 114 {"Context Rule 5+6", NewFreeform(), []testCase{ 115 // Rule 5+6: Hebrew preceding 116 // U+05f3: Geresh 117 {"׳", "", errContext}, 118 {"׳ה", "", errContext}, 119 {"a׳b", "", errContext}, 120 {"ש׳", "ש׳", nil}, // U+05e9 U+05f3 121 {"ש׳׳׳", "ש׳׳׳", nil}, // U+05e9 U+05f3 122 123 // U+05f4: Gershayim 124 {"״", "", errContext}, 125 {"״ה", "", errContext}, 126 {"a״b", "", errContext}, 127 {"ש״", "ש״", nil}, // U+05e9 U+05f4 128 {"ש״״״", "ש״״״", nil}, // U+05e9 U+05f4 129 {"aש״״״", "aש״״״", nil}, // U+05e9 U+05f4 130 }}, 131 132 {"Context Rule 7", NewFreeform(), []testCase{ 133 // Rule 7: Katakana middle Dot 134 {"・", "", errContext}, 135 {"abc・", "", errContext}, 136 {"・def", "", errContext}, 137 {"abc・def", "", errContext}, 138 {"aヅc・def", "aヅc・def", nil}, 139 {"abc・dぶf", "abc・dぶf", nil}, 140 {"⺐bc・def", "⺐bc・def", nil}, 141 }}, 142 143 {"Context Rule 8+9", NewFreeform(), []testCase{ 144 // Rule 8+9: Arabic Indic Digit 145 {"١٢٣٤٥۶", "", errContext}, 146 {"۱۲۳۴۵٦", "", errContext}, 147 {"١٢٣٤٥", "١٢٣٤٥", nil}, 148 {"۱۲۳۴۵", "۱۲۳۴۵", nil}, 149 }}, 150 151 {"Nickname", Nickname, []testCase{ 152 {" Swan of Avon ", "Swan of Avon", nil}, 153 {"", "", errEmptyString}, 154 {" ", "", errEmptyString}, 155 {" ", "", errEmptyString}, 156 {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil}, 157 {"Foo", "Foo", nil}, 158 {"foo", "foo", nil}, 159 {"Foo Bar", "Foo Bar", nil}, 160 {"foo bar", "foo bar", nil}, 161 {"\u03A3", "\u03A3", nil}, 162 {"\u03C3", "\u03C3", nil}, 163 // Greek final sigma is left as is (do not fold!) 164 {"\u03C2", "\u03C2", nil}, 165 {"\u265A", "♚", nil}, 166 {"Richard \u2163", "Richard IV", nil}, 167 {"\u212B", "Å", nil}, 168 {"\uFB00", "ff", nil}, // because of NFKC 169 {"שa", "שa", nil}, // no bidi rule 170 {"동일조건변경허락", "동일조건변경허락", nil}, 171 }}, 172 {"OpaqueString", OpaqueString, []testCase{ 173 {" Swan of Avon ", " Swan of Avon ", nil}, 174 {"", "", errEmptyString}, 175 {" ", " ", nil}, 176 {" ", " ", nil}, 177 {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil}, 178 {"Foo", "Foo", nil}, 179 {"foo", "foo", nil}, 180 {"Foo Bar", "Foo Bar", nil}, 181 {"foo bar", "foo bar", nil}, 182 {"\u03C3", "\u03C3", nil}, 183 {"Richard \u2163", "Richard \u2163", nil}, 184 {"\u212B", "Å", nil}, 185 {"Jack of \u2666s", "Jack of \u2666s", nil}, 186 {"my cat is a \u0009by", "", errDisallowedRune}, 187 {"שa", "שa", nil}, // no bidi rule 188 }}, 189 {"UsernameCaseMapped", UsernameCaseMapped, []testCase{ 190 // TODO: Should this work? 191 // {UsernameCaseMapped, "", "", errDisallowedRune}, 192 {"juliet@example.com", "juliet@example.com", nil}, 193 {"fussball", "fussball", nil}, 194 {"fu\u00DFball", "fu\u00DFball", nil}, 195 {"\u03C0", "\u03C0", nil}, 196 {"\u03A3", "\u03C3", nil}, 197 {"\u03C3", "\u03C3", nil}, 198 // Greek final sigma is left as is (do not fold!) 199 {"\u03C2", "\u03C2", nil}, 200 {"\u0049", "\u0069", nil}, 201 {"\u0049", "\u0069", nil}, 202 {"\u03D2", "", errDisallowedRune}, 203 {"\u03B0", "\u03B0", nil}, 204 {"foo bar", "", errDisallowedRune}, 205 {"♚", "", bidirule.ErrInvalid}, 206 {"\u007E", "~", nil}, 207 {"a", "a", nil}, 208 {"!", "!", nil}, 209 {"²", "", bidirule.ErrInvalid}, 210 {"\t", "", errDisallowedRune}, 211 {"\n", "", errDisallowedRune}, 212 {"\u26D6", "", bidirule.ErrInvalid}, 213 {"\u26FF", "", bidirule.ErrInvalid}, 214 {"\uFB00", "", errDisallowedRune}, 215 {"\u1680", "", bidirule.ErrInvalid}, 216 {" ", "", errDisallowedRune}, 217 {" ", "", errDisallowedRune}, 218 {"\u01C5", "", errDisallowedRune}, 219 {"\u16EE", "", errDisallowedRune}, // Nl RUNIC ARLAUG SYMBOL 220 {"\u0488", "", bidirule.ErrInvalid}, // Me COMBINING CYRILLIC HUNDRED THOUSANDS SIGN 221 {"\u212B", "\u00e5", nil}, // Angstrom sign, NFC -> U+00E5 222 {"A\u030A", "å", nil}, // A + ring 223 {"\u00C5", "å", nil}, // A with ring 224 {"\u00E7", "ç", nil}, // c cedille 225 {"\u0063\u0327", "ç", nil}, // c + cedille 226 {"\u0158", "ř", nil}, 227 {"\u0052\u030C", "ř", nil}, 228 229 {"\u1E61", "\u1E61", nil}, // LATIN SMALL LETTER S WITH DOT ABOVE 230 231 // Confusable characters ARE allowed and should NOT be mapped. 232 {"\u0410", "\u0430", nil}, // CYRILLIC CAPITAL LETTER A 233 234 // Full width should be mapped to the canonical decomposition. 235 {"AB", "ab", nil}, 236 {"שc", "", bidirule.ErrInvalid}, // bidi rule 237 238 }}, 239 {"UsernameCasePreserved", UsernameCasePreserved, []testCase{ 240 {"ABC", "ABC", nil}, 241 {"AB", "AB", nil}, 242 {"שc", "", bidirule.ErrInvalid}, // bidi rule 243 {"\uFB00", "", errDisallowedRune}, 244 {"\u212B", "\u00c5", nil}, // Angstrom sign, NFC -> U+00E5 245 {"ẛ", "", errDisallowedRune}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE 246 }}, 247 {"UsernameCaseMappedRestricted", NewRestrictedProfile(UsernameCaseMapped, runes.Predicate(func(r rune) bool { 248 return strings.ContainsRune(`@`, r) 249 })), []testCase{ 250 {"juliet@example.com", "", errDisallowedRune}, 251 {"\u0049", "\u0069", nil}, 252 }}, 253 }