gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/net/idna/punycode_test.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package idna 6 7 import ( 8 "strings" 9 "testing" 10 ) 11 12 var punycodeTestCases = [...]struct { 13 s, encoded string 14 }{ 15 {"", ""}, 16 {"-", "--"}, 17 {"-a", "-a-"}, 18 {"-a-", "-a--"}, 19 {"a", "a-"}, 20 {"a-", "a--"}, 21 {"a-b", "a-b-"}, 22 {"books", "books-"}, 23 {"bücher", "bcher-kva"}, 24 {"Hello世界", "Hello-ck1hg65u"}, 25 {"ü", "tda"}, 26 {"üý", "tdac"}, 27 28 // The test cases below come from RFC 3492 section 7.1 with Errata 3026. 29 { 30 // (A) Arabic (Egyptian). 31 "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" + 32 "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F", 33 "egbpdaj6bu4bxfgehfvwxn", 34 }, 35 { 36 // (B) Chinese (simplified). 37 "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587", 38 "ihqwcrb4cv8a8dqg056pqjye", 39 }, 40 { 41 // (C) Chinese (traditional). 42 "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587", 43 "ihqwctvzc91f659drss3x8bo0yb", 44 }, 45 { 46 // (D) Czech. 47 "\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" + 48 "\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" + 49 "\u0065\u0073\u006B\u0079", 50 "Proprostnemluvesky-uyb24dma41a", 51 }, 52 { 53 // (E) Hebrew. 54 "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" + 55 "\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" + 56 "\u05D1\u05E8\u05D9\u05EA", 57 "4dbcagdahymbxekheh6e0a7fei0b", 58 }, 59 { 60 // (F) Hindi (Devanagari). 61 "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" + 62 "\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" + 63 "\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" + 64 "\u0939\u0948\u0902", 65 "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 66 }, 67 { 68 // (G) Japanese (kanji and hiragana). 69 "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" + 70 "\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B", 71 "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 72 }, 73 { 74 // (H) Korean (Hangul syllables). 75 "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" + 76 "\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" + 77 "\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C", 78 "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" + 79 "psd879ccm6fea98c", 80 }, 81 { 82 // (I) Russian (Cyrillic). 83 "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" + 84 "\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" + 85 "\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" + 86 "\u0438", 87 "b1abfaaepdrnnbgefbadotcwatmq2g4l", 88 }, 89 { 90 // (J) Spanish. 91 "\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" + 92 "\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" + 93 "\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" + 94 "\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" + 95 "\u0061\u00F1\u006F\u006C", 96 "PorqunopuedensimplementehablarenEspaol-fmd56a", 97 }, 98 { 99 // (K) Vietnamese. 100 "\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" + 101 "\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" + 102 "\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" + 103 "\u0056\u0069\u1EC7\u0074", 104 "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 105 }, 106 { 107 // (L) 3<nen>B<gumi><kinpachi><sensei>. 108 "\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F", 109 "3B-ww4c5e180e575a65lsy2b", 110 }, 111 { 112 // (M) <amuro><namie>-with-SUPER-MONKEYS. 113 "\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" + 114 "\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" + 115 "\u004F\u004E\u004B\u0045\u0059\u0053", 116 "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 117 }, 118 { 119 // (N) Hello-Another-Way-<sorezore><no><basho>. 120 "\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" + 121 "\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" + 122 "\u305D\u308C\u305E\u308C\u306E\u5834\u6240", 123 "Hello-Another-Way--fc4qua05auwb3674vfr0b", 124 }, 125 { 126 // (O) <hitotsu><yane><no><shita>2. 127 "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032", 128 "2-u9tlzr9756bt3uc0v", 129 }, 130 { 131 // (P) Maji<de>Koi<suru>5<byou><mae> 132 "\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" + 133 "\u308B\u0035\u79D2\u524D", 134 "MajiKoi5-783gue6qz075azm5e", 135 }, 136 { 137 // (Q) <pafii>de<runba> 138 "\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0", 139 "de-jg4avhby1noc0d", 140 }, 141 { 142 // (R) <sono><supiido><de> 143 "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067", 144 "d9juau41awczczp", 145 }, 146 { 147 // (S) -> $1.00 <- 148 "\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" + 149 "\u003C\u002D", 150 "-> $1.00 <--", 151 }, 152 } 153 154 func TestPunycode(t *testing.T) { 155 for _, tc := range punycodeTestCases { 156 if got, err := decode(tc.encoded); err != nil { 157 t.Errorf("decode(%q): %v", tc.encoded, err) 158 } else if got != tc.s { 159 t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s) 160 } 161 162 if got, err := encode("", tc.s); err != nil { 163 t.Errorf(`encode("", %q): %v`, tc.s, err) 164 } else if got != tc.encoded { 165 t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded) 166 } 167 } 168 } 169 170 var punycodeErrorTestCases = [...]string{ 171 "decode -", // A sole '-' is invalid. 172 "decode foo\x00bar", // '\x00' is not in [0-9A-Za-z]. 173 "decode foo#bar", // '#' is not in [0-9A-Za-z]. 174 "decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z]. 175 "decode 9", // "9a" decodes to codepoint \u00A3; "9" is truncated. 176 "decode 99999a", // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF. 177 "decode 9999999999a", // "9999999999a" overflows the int32 calculation. 178 179 "encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow. 180 } 181 182 func TestPunycodeErrors(t *testing.T) { 183 for _, tc := range punycodeErrorTestCases { 184 var err error 185 switch { 186 case strings.HasPrefix(tc, "decode "): 187 _, err = decode(tc[7:]) 188 case strings.HasPrefix(tc, "encode "): 189 _, err = encode("", tc[7:]) 190 } 191 if err == nil { 192 if len(tc) > 256 { 193 tc = tc[:100] + "..." + tc[len(tc)-100:] 194 } 195 t.Errorf("no error for %s", tc) 196 } 197 } 198 }