golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/idna/punycode_test.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package idna
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  )
    11  
    12  var punycodeTestCases = [...]struct {
    13  	s, encoded string
    14  }{
    15  	{"", ""},
    16  	{"-", "--"},
    17  	{"-a", "-a-"},
    18  	{"-a-", "-a--"},
    19  	{"a", "a-"},
    20  	{"a-", "a--"},
    21  	{"a-b", "a-b-"},
    22  	{"books", "books-"},
    23  	{"bücher", "bcher-kva"},
    24  	{"Hello世界", "Hello-ck1hg65u"},
    25  	{"ü", "tda"},
    26  	{"üý", "tdac"},
    27  
    28  	// The test cases below come from RFC 3492 section 7.1 with Errata 3026.
    29  	{
    30  		// (A) Arabic (Egyptian).
    31  		"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" +
    32  			"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
    33  		"egbpdaj6bu4bxfgehfvwxn",
    34  	},
    35  	{
    36  		// (B) Chinese (simplified).
    37  		"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
    38  		"ihqwcrb4cv8a8dqg056pqjye",
    39  	},
    40  	{
    41  		// (C) Chinese (traditional).
    42  		"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
    43  		"ihqwctvzc91f659drss3x8bo0yb",
    44  	},
    45  	{
    46  		// (D) Czech.
    47  		"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" +
    48  			"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" +
    49  			"\u0065\u0073\u006B\u0079",
    50  		"Proprostnemluvesky-uyb24dma41a",
    51  	},
    52  	{
    53  		// (E) Hebrew.
    54  		"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" +
    55  			"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" +
    56  			"\u05D1\u05E8\u05D9\u05EA",
    57  		"4dbcagdahymbxekheh6e0a7fei0b",
    58  	},
    59  	{
    60  		// (F) Hindi (Devanagari).
    61  		"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" +
    62  			"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" +
    63  			"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" +
    64  			"\u0939\u0948\u0902",
    65  		"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
    66  	},
    67  	{
    68  		// (G) Japanese (kanji and hiragana).
    69  		"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" +
    70  			"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
    71  		"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
    72  	},
    73  	{
    74  		// (H) Korean (Hangul syllables).
    75  		"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" +
    76  			"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" +
    77  			"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
    78  		"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" +
    79  			"psd879ccm6fea98c",
    80  	},
    81  	{
    82  		// (I) Russian (Cyrillic).
    83  		"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" +
    84  			"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" +
    85  			"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" +
    86  			"\u0438",
    87  		"b1abfaaepdrnnbgefbadotcwatmq2g4l",
    88  	},
    89  	{
    90  		// (J) Spanish.
    91  		"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" +
    92  			"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" +
    93  			"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" +
    94  			"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" +
    95  			"\u0061\u00F1\u006F\u006C",
    96  		"PorqunopuedensimplementehablarenEspaol-fmd56a",
    97  	},
    98  	{
    99  		// (K) Vietnamese.
   100  		"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" +
   101  			"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" +
   102  			"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" +
   103  			"\u0056\u0069\u1EC7\u0074",
   104  		"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
   105  	},
   106  	{
   107  		// (L) 3<nen>B<gumi><kinpachi><sensei>.
   108  		"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
   109  		"3B-ww4c5e180e575a65lsy2b",
   110  	},
   111  	{
   112  		// (M) <amuro><namie>-with-SUPER-MONKEYS.
   113  		"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" +
   114  			"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" +
   115  			"\u004F\u004E\u004B\u0045\u0059\u0053",
   116  		"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
   117  	},
   118  	{
   119  		// (N) Hello-Another-Way-<sorezore><no><basho>.
   120  		"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" +
   121  			"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" +
   122  			"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
   123  		"Hello-Another-Way--fc4qua05auwb3674vfr0b",
   124  	},
   125  	{
   126  		// (O) <hitotsu><yane><no><shita>2.
   127  		"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
   128  		"2-u9tlzr9756bt3uc0v",
   129  	},
   130  	{
   131  		// (P) Maji<de>Koi<suru>5<byou><mae>
   132  		"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" +
   133  			"\u308B\u0035\u79D2\u524D",
   134  		"MajiKoi5-783gue6qz075azm5e",
   135  	},
   136  	{
   137  		// (Q) <pafii>de<runba>
   138  		"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
   139  		"de-jg4avhby1noc0d",
   140  	},
   141  	{
   142  		// (R) <sono><supiido><de>
   143  		"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
   144  		"d9juau41awczczp",
   145  	},
   146  	{
   147  		// (S) -> $1.00 <-
   148  		"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" +
   149  			"\u003C\u002D",
   150  		"-> $1.00 <--",
   151  	},
   152  }
   153  
   154  func TestPunycode(t *testing.T) {
   155  	for _, tc := range punycodeTestCases {
   156  		if got, err := decode(tc.encoded); err != nil {
   157  			t.Errorf("decode(%q): %v", tc.encoded, err)
   158  		} else if got != tc.s {
   159  			t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s)
   160  		}
   161  
   162  		if got, err := encode("", tc.s); err != nil {
   163  			t.Errorf(`encode("", %q): %v`, tc.s, err)
   164  		} else if got != tc.encoded {
   165  			t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded)
   166  		}
   167  	}
   168  }
   169  
   170  var punycodeErrorTestCases = [...]string{
   171  	"decode -",            // A sole '-' is invalid.
   172  	"decode foo\x00bar",   // '\x00' is not in [0-9A-Za-z].
   173  	"decode foo#bar",      // '#' is not in [0-9A-Za-z].
   174  	"decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z].
   175  	"decode 9",            // "9a" decodes to codepoint \u00A3; "9" is truncated.
   176  	"decode 99999a",       // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF.
   177  	"decode 9999999999a",  // "9999999999a" overflows the int32 calculation.
   178  
   179  	"encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow.
   180  }
   181  
   182  func TestPunycodeErrors(t *testing.T) {
   183  	for _, tc := range punycodeErrorTestCases {
   184  		var err error
   185  		switch {
   186  		case strings.HasPrefix(tc, "decode "):
   187  			_, err = decode(tc[7:])
   188  		case strings.HasPrefix(tc, "encode "):
   189  			_, err = encode("", tc[7:])
   190  		}
   191  		if err == nil {
   192  			if len(tc) > 256 {
   193  				tc = tc[:100] + "..." + tc[len(tc)-100:]
   194  			}
   195  			t.Errorf("no error for %s", tc)
   196  		}
   197  	}
   198  }