golang.org/x/text@v0.14.0/internal/export/idna/idna9.0.0_test.go

golang.org/x/text@v0.14.0/internal/export/idna/idna9.0.0_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !go1.10
     6  
     7  package idna
     8  
     9  import "testing"
    10  
    11  // TestLabelErrors tests strings returned in case of error. All results should
    12  // be identical to the reference implementation and can be verified at
    13  // https://unicode.org/cldr/utility/idna.jsp. The reference implementation,
    14  // however, seems to not display Bidi and ContextJ errors.
    15  //
    16  // In some cases the behavior of browsers is added as a comment. In all cases,
    17  // whenever a resolve search returns an error here, Chrome will treat the input
    18  // string as a search string (including those for Bidi and Context J errors),
    19  // unless noted otherwise.
    20  func TestLabelErrors(t *testing.T) {
    21  	encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
    22  	type kind struct {
    23  		name string
    24  		f    func(string) (string, error)
    25  	}
    26  	punyA := kind{"PunycodeA", punycode.ToASCII}
    27  	resolve := kind{"ResolveA", Lookup.ToASCII}
    28  	display := kind{"ToUnicode", Display.ToUnicode}
    29  	p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
    30  	lengthU := kind{"CheckLengthU", p.ToUnicode}
    31  	lengthA := kind{"CheckLengthA", p.ToASCII}
    32  	p = New(MapForLookup(), StrictDomainName(false))
    33  	std3 := kind{"STD3", p.ToASCII}
    34  	p = New(MapForLookup(), CheckHyphens(false))
    35  	hyphens := kind{"CheckHyphens", p.ToASCII}
    36  	p = New(MapForLookup(), Transitional(true))
    37  	transitional := kind{"Transitional", p.ToASCII}
    38  	p = New(MapForLookup(), Transitional(false))
    39  	nontransitional := kind{"Nontransitional", p.ToASCII}
    40  
    41  	testCases := []struct {
    42  		kind
    43  		input   string
    44  		want    string
    45  		wantErr string
    46  	}{
    47  		{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
    48  		{lengthA, "", "", "A4"},
    49  
    50  		{lengthU, "xn--", "", "A4"},
    51  		{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
    52  		{lengthU, "xn--.foo", ".foo", "A4"},
    53  		{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
    54  
    55  		{display, "xn--", "", ""},
    56  		{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
    57  		{display, "xn--.foo", ".foo", ""},
    58  		{display, "foo.xn--.bar", "foo..bar", ""},
    59  
    60  		{lengthA, "a..b", "a..b", "A4"},
    61  		{punyA, ".b", ".b", ""},
    62  		// For backwards compatibility, the Punycode profile does not map runes.
    63  		{punyA, "\u3002b", "xn--b-83t", ""},
    64  		{punyA, "..b", "..b", ""},
    65  		// Only strip leading empty labels for certain profiles. Stripping
    66  		// leading empty labels here but not for "empty" punycode above seems
    67  		// inconsistent, but seems to be applied by both the conformance test
    68  		// and Chrome. So we turn it off by default, support it as an option,
    69  		// and enable it in profiles where it seems commonplace.
    70  		{lengthA, ".b", "b", ""},
    71  		{lengthA, "\u3002b", "b", ""},
    72  		{lengthA, "..b", "b", ""},
    73  		{lengthA, "b..", "b..", ""},
    74  
    75  		{resolve, "a..b", "a..b", ""},
    76  		{resolve, ".b", "b", ""},
    77  		{resolve, "\u3002b", "b", ""},
    78  		{resolve, "..b", "b", ""},
    79  		{resolve, "b..", "b..", ""},
    80  		{resolve, "\xed", "", "P1"},
    81  
    82  		// Raw punycode
    83  		{punyA, "", "", ""},
    84  		{punyA, "*.foo.com", "*.foo.com", ""},
    85  		{punyA, "Foo.com", "Foo.com", ""},
    86  
    87  		// STD3 rules
    88  		{display, "*.foo.com", "*.foo.com", "P1"},
    89  		{std3, "*.foo.com", "*.foo.com", ""},
    90  
    91  		// Hyphens
    92  		{display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
    93  		{hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
    94  		{display, "-label-.com", "-label-.com", "V3"},
    95  		{hyphens, "-label-.com", "-label-.com", ""},
    96  
    97  		// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
    98  		// Chrome, modern Firefox, Safari, and IE.
    99  		{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
   100  		{display, "lab⒐be", "lab⒐be", "P1"},
   101  		{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
   102  		{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
   103  
   104  		// Transitional vs Nontransitional processing
   105  		{transitional, "Plan9faß.de", "plan9fass.de", ""},
   106  		{nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""},
   107  
   108  		// Chrome 54.0 recognizes the error and treats this input verbatim as a
   109  		// search string.
   110  		// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
   111  		// punycode on the result using transitional mapping.
   112  		// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
   113  		// seems to be nested punycode encodings.
   114  		{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
   115  		{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
   116  
   117  		{resolve, "a\u200Cb", "ab", ""},
   118  		{display, "a\u200Cb", "a\u200Cb", "C"},
   119  
   120  		{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
   121  		{display, "a\u200Cb", "a\u200Cb", "C"},
   122  
   123  		{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
   124  		{
   125  			// Notice how the string gets transformed, even with an error.
   126  			// Chrome will use the original string if it finds an error, so not
   127  			// the transformed one.
   128  			display,
   129  			"gr\ufecb\ufeae\ufe91\ufef2.de",
   130  			"gr\u0639\u0631\u0628\u064a.de",
   131  			"B",
   132  		},
   133  
   134  		{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
   135  		{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
   136  
   137  		// normalize input
   138  		{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
   139  		{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
   140  
   141  		// Non-normalized strings are not normalized when they originate from
   142  		// punycode. Despite the error, Chrome, Safari and Firefox will attempt
   143  		// to look up the input punycode.
   144  		{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
   145  		{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
   146  	}
   147  
   148  	for _, tc := range testCases {
   149  		doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
   150  	}
   151  }