github.com/go-xe2/third@v1.0.3/golang.org/x/text/internal/export/idna/idna_test.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package idna 6 7 import ( 8 "fmt" 9 "strconv" 10 "strings" 11 "testing" 12 13 "github.com/go-xe2/third/golang.org/x/text/internal/gen" 14 "github.com/go-xe2/third/golang.org/x/text/internal/testtext" 15 "github.com/go-xe2/third/golang.org/x/text/internal/ucd" 16 ) 17 18 func TestAllocToUnicode(t *testing.T) { 19 avg := testtext.AllocsPerRun(1000, func() { 20 ToUnicode("www.golang.org") 21 }) 22 if avg > 0 { 23 t.Errorf("got %f; want 0", avg) 24 } 25 } 26 27 func TestAllocToASCII(t *testing.T) { 28 avg := testtext.AllocsPerRun(1000, func() { 29 ToASCII("www.golang.org") 30 }) 31 if avg > 0 { 32 t.Errorf("got %f; want 0", avg) 33 } 34 } 35 36 func TestProfiles(t *testing.T) { 37 testCases := []struct { 38 name string 39 want, got *Profile 40 }{ 41 {"Punycode", punycode, New()}, 42 {"Registration", registration, New(ValidateForRegistration())}, 43 {"Registration", registration, New( 44 ValidateForRegistration(), 45 VerifyDNSLength(true), 46 BidiRule(), 47 )}, 48 {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))}, 49 {"Display", display, New(MapForLookup(), BidiRule())}, 50 } 51 for _, tc := range testCases { 52 // Functions are not comparable, but the printed version will include 53 // their pointers. 54 got := fmt.Sprintf("%#v", tc.got) 55 want := fmt.Sprintf("%#v", tc.want) 56 if got != want { 57 t.Errorf("%s: \ngot %#v,\nwant %#v", tc.name, got, want) 58 } 59 } 60 } 61 62 // doTest performs a single test f(input) and verifies that the output matches 63 // out and that the returned error is expected. The errors string contains 64 // all allowed error codes as categorized in 65 // http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt: 66 // P: Processing 67 // V: Validity 68 // A: to ASCII 69 // B: Bidi 70 // C: Context J 71 func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) { 72 errors = strings.Trim(errors, "[]") 73 test := "ok" 74 if errors != "" { 75 test = "err:" + errors 76 } 77 // Replace some of the escape sequences to make it easier to single out 78 // tests on the command name. 79 in := strings.Trim(strconv.QuoteToASCII(input), `"`) 80 in = strings.Replace(in, `\u`, "#", -1) 81 in = strings.Replace(in, `\U`, "#", -1) 82 name = fmt.Sprintf("%s/%s/%s", name, in, test) 83 84 testtext.Run(t, name, func(t *testing.T) { 85 got, err := f(input) 86 87 if err != nil { 88 code := err.(interface { 89 code() string 90 }).code() 91 if strings.Index(errors, code) == -1 { 92 t.Errorf("error %q not in set of expected errors {%v}", code, errors) 93 } 94 } else if errors != "" { 95 t.Errorf("no errors; want error in {%v}", errors) 96 } 97 98 if want != "" && got != want { 99 t.Errorf(`string: got %+q; want %+q`, got, want) 100 } 101 }) 102 } 103 104 // TestLabelErrors tests strings returned in case of error. All results should 105 // be identical to the reference implementation and can be verified at 106 // http://unicode.org/cldr/utility/idna.jsp. The reference implementation, 107 // however, seems to not display Bidi and ContextJ errors. 108 // 109 // In some cases the behavior of browsers is added as a comment. In all cases, 110 // whenever a resolve search returns an error here, Chrome will treat the input 111 // string as a search string (including those for Bidi and Context J errors), 112 // unless noted otherwise. 113 func TestLabelErrors(t *testing.T) { 114 encode := func(s string) string { s, _ = encode(acePrefix, s); return s } 115 type kind struct { 116 name string 117 f func(string) (string, error) 118 } 119 punyA := kind{"PunycodeA", punycode.ToASCII} 120 resolve := kind{"ResolveA", Lookup.ToASCII} 121 display := kind{"ToUnicode", Display.ToUnicode} 122 p := New(VerifyDNSLength(true), MapForLookup(), BidiRule()) 123 lengthU := kind{"CheckLengthU", p.ToUnicode} 124 lengthA := kind{"CheckLengthA", p.ToASCII} 125 p = New(MapForLookup(), StrictDomainName(false)) 126 std3 := kind{"STD3", p.ToASCII} 127 128 testCases := []struct { 129 kind 130 input string 131 want string 132 wantErr string 133 }{ 134 {lengthU, "", "", "A4"}, // From UTS 46 conformance test. 135 {lengthA, "", "", "A4"}, 136 137 {lengthU, "xn--", "", "A4"}, 138 {lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct? 139 {lengthU, "xn--.foo", ".foo", "A4"}, 140 {lengthU, "foo.xn--.bar", "foo..bar", "A4"}, 141 142 {display, "xn--", "", ""}, 143 {display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct? 144 {display, "xn--.foo", ".foo", ""}, 145 {display, "foo.xn--.bar", "foo..bar", ""}, 146 147 {lengthA, "a..b", "a..b", "A4"}, 148 {punyA, ".b", ".b", ""}, 149 // For backwards compatibility, the Punycode profile does not map runes. 150 {punyA, "\u3002b", "xn--b-83t", ""}, 151 {punyA, "..b", "..b", ""}, 152 153 {lengthA, ".b", ".b", "A4"}, 154 {lengthA, "\u3002b", ".b", "A4"}, 155 {lengthA, "..b", "..b", "A4"}, 156 {lengthA, "b..", "b..", ""}, 157 158 // Sharpened Bidi rules for Unicode 10.0.0. Apply for ALL labels in ANY 159 // of the labels is RTL. 160 {lengthA, "\ufe05\u3002\u3002\U0002603e\u1ce0", "..xn--t6f5138v", "A4"}, 161 {lengthA, "FAX\u2a77\U0001d186\u3002\U0001e942\U000e0181\u180c", "", "B6"}, 162 163 {resolve, "a..b", "a..b", ""}, 164 // Note that leading dots are not stripped. This is to be consistent 165 // with the Punycode profile as well as the conformance test. 166 {resolve, ".b", ".b", ""}, 167 {resolve, "\u3002b", ".b", ""}, 168 {resolve, "..b", "..b", ""}, 169 {resolve, "b..", "b..", ""}, 170 {resolve, "\xed", "", "P1"}, 171 172 // Raw punycode 173 {punyA, "", "", ""}, 174 {punyA, "*.foo.com", "*.foo.com", ""}, 175 {punyA, "Foo.com", "Foo.com", ""}, 176 177 // STD3 rules 178 {display, "*.foo.com", "*.foo.com", "P1"}, 179 {std3, "*.foo.com", "*.foo.com", ""}, 180 181 // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of 182 // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return 183 // lab9.be. 184 {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") 185 {display, "lab⒐be", "lab⒐be", "P1"}, 186 187 {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" 188 {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, 189 190 // Chrome 54.0 recognizes the error and treats this input verbatim as a 191 // search string. 192 // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the 193 // punycode on the result using transitional mapping. 194 // Firefox 49.0.1 goes haywire on this string and prints a bunch of what 195 // seems to be nested punycode encodings. 196 {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, 197 {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"}, 198 199 {resolve, "a\u200Cb", "ab", ""}, 200 {display, "a\u200Cb", "a\u200Cb", "C"}, 201 202 {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"}, 203 {display, "a\u200Cb", "a\u200Cb", "C"}, 204 205 {resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"}, 206 { 207 // Notice how the string gets transformed, even with an error. 208 // Chrome will use the original string if it finds an error, so not 209 // the transformed one. 210 display, 211 "gr\ufecb\ufeae\ufe91\ufef2.de", 212 "gr\u0639\u0631\u0628\u064a.de", 213 "B", 214 }, 215 216 {resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ 217 {display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"}, 218 219 // normalize input 220 {resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢ 221 {display, "a\u0323\u0322", "\u1ea1\u0322", ""}, 222 223 // Non-normalized strings are not normalized when they originate from 224 // punycode. Despite the error, Chrome, Safari and Firefox will attempt 225 // to look up the input punycode. 226 {resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"}, 227 {display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"}, 228 } 229 230 for _, tc := range testCases { 231 doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr) 232 } 233 } 234 235 func TestConformance(t *testing.T) { 236 testtext.SkipIfNotLong(t) 237 238 r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt") 239 defer r.Close() 240 241 section := "main" 242 started := false 243 p := ucd.New(r, ucd.CommentHandler(func(s string) { 244 if started { 245 section = strings.ToLower(strings.Split(s, " ")[0]) 246 } 247 })) 248 transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup()) 249 nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup()) 250 for p.Next() { 251 started = true 252 253 // What to test 254 profiles := []*Profile{} 255 switch p.String(0) { 256 case "T": 257 profiles = append(profiles, transitional) 258 case "N": 259 profiles = append(profiles, nonTransitional) 260 case "B": 261 profiles = append(profiles, transitional) 262 profiles = append(profiles, nonTransitional) 263 } 264 265 src := unescape(p.String(1)) 266 267 wantToUnicode := unescape(p.String(2)) 268 if wantToUnicode == "" { 269 wantToUnicode = src 270 } 271 wantToASCII := unescape(p.String(3)) 272 if wantToASCII == "" { 273 wantToASCII = wantToUnicode 274 } 275 wantErrToUnicode := "" 276 if strings.HasPrefix(wantToUnicode, "[") { 277 wantErrToUnicode = wantToUnicode 278 wantToUnicode = "" 279 } 280 wantErrToASCII := "" 281 if strings.HasPrefix(wantToASCII, "[") { 282 wantErrToASCII = wantToASCII 283 wantToASCII = "" 284 } 285 286 // TODO: also do IDNA tests. 287 // invalidInIDNA2008 := p.String(4) == "NV8" 288 289 for _, p := range profiles { 290 name := fmt.Sprintf("%s:%s", section, p) 291 doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode) 292 doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII) 293 } 294 } 295 } 296 297 func unescape(s string) string { 298 s, err := strconv.Unquote(`"` + s + `"`) 299 if err != nil { 300 panic(err) 301 } 302 return s 303 } 304 305 func BenchmarkProfile(b *testing.B) { 306 for i := 0; i < b.N; i++ { 307 Lookup.ToASCII("www.yahoogle.com") 308 } 309 }