github.com/go-xe2/third@v1.0.3/golang.org/x/text/internal/export/idna/idna_test.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package idna
     6  
     7  import (
     8  	"fmt"
     9  	"strconv"
    10  	"strings"
    11  	"testing"
    12  
    13  	"github.com/go-xe2/third/golang.org/x/text/internal/gen"
    14  	"github.com/go-xe2/third/golang.org/x/text/internal/testtext"
    15  	"github.com/go-xe2/third/golang.org/x/text/internal/ucd"
    16  )
    17  
    18  func TestAllocToUnicode(t *testing.T) {
    19  	avg := testtext.AllocsPerRun(1000, func() {
    20  		ToUnicode("www.golang.org")
    21  	})
    22  	if avg > 0 {
    23  		t.Errorf("got %f; want 0", avg)
    24  	}
    25  }
    26  
    27  func TestAllocToASCII(t *testing.T) {
    28  	avg := testtext.AllocsPerRun(1000, func() {
    29  		ToASCII("www.golang.org")
    30  	})
    31  	if avg > 0 {
    32  		t.Errorf("got %f; want 0", avg)
    33  	}
    34  }
    35  
    36  func TestProfiles(t *testing.T) {
    37  	testCases := []struct {
    38  		name      string
    39  		want, got *Profile
    40  	}{
    41  		{"Punycode", punycode, New()},
    42  		{"Registration", registration, New(ValidateForRegistration())},
    43  		{"Registration", registration, New(
    44  			ValidateForRegistration(),
    45  			VerifyDNSLength(true),
    46  			BidiRule(),
    47  		)},
    48  		{"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))},
    49  		{"Display", display, New(MapForLookup(), BidiRule())},
    50  	}
    51  	for _, tc := range testCases {
    52  		// Functions are not comparable, but the printed version will include
    53  		// their pointers.
    54  		got := fmt.Sprintf("%#v", tc.got)
    55  		want := fmt.Sprintf("%#v", tc.want)
    56  		if got != want {
    57  			t.Errorf("%s: \ngot  %#v,\nwant %#v", tc.name, got, want)
    58  		}
    59  	}
    60  }
    61  
    62  // doTest performs a single test f(input) and verifies that the output matches
    63  // out and that the returned error is expected. The errors string contains
    64  // all allowed error codes as categorized in
    65  // http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
    66  // P: Processing
    67  // V: Validity
    68  // A: to ASCII
    69  // B: Bidi
    70  // C: Context J
    71  func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
    72  	errors = strings.Trim(errors, "[]")
    73  	test := "ok"
    74  	if errors != "" {
    75  		test = "err:" + errors
    76  	}
    77  	// Replace some of the escape sequences to make it easier to single out
    78  	// tests on the command name.
    79  	in := strings.Trim(strconv.QuoteToASCII(input), `"`)
    80  	in = strings.Replace(in, `\u`, "#", -1)
    81  	in = strings.Replace(in, `\U`, "#", -1)
    82  	name = fmt.Sprintf("%s/%s/%s", name, in, test)
    83  
    84  	testtext.Run(t, name, func(t *testing.T) {
    85  		got, err := f(input)
    86  
    87  		if err != nil {
    88  			code := err.(interface {
    89  				code() string
    90  			}).code()
    91  			if strings.Index(errors, code) == -1 {
    92  				t.Errorf("error %q not in set of expected errors {%v}", code, errors)
    93  			}
    94  		} else if errors != "" {
    95  			t.Errorf("no errors; want error in {%v}", errors)
    96  		}
    97  
    98  		if want != "" && got != want {
    99  			t.Errorf(`string: got %+q; want %+q`, got, want)
   100  		}
   101  	})
   102  }
   103  
   104  // TestLabelErrors tests strings returned in case of error. All results should
   105  // be identical to the reference implementation and can be verified at
   106  // http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
   107  // however, seems to not display Bidi and ContextJ errors.
   108  //
   109  // In some cases the behavior of browsers is added as a comment. In all cases,
   110  // whenever a resolve search returns an error here, Chrome will treat the input
   111  // string as a search string (including those for Bidi and Context J errors),
   112  // unless noted otherwise.
   113  func TestLabelErrors(t *testing.T) {
   114  	encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
   115  	type kind struct {
   116  		name string
   117  		f    func(string) (string, error)
   118  	}
   119  	punyA := kind{"PunycodeA", punycode.ToASCII}
   120  	resolve := kind{"ResolveA", Lookup.ToASCII}
   121  	display := kind{"ToUnicode", Display.ToUnicode}
   122  	p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
   123  	lengthU := kind{"CheckLengthU", p.ToUnicode}
   124  	lengthA := kind{"CheckLengthA", p.ToASCII}
   125  	p = New(MapForLookup(), StrictDomainName(false))
   126  	std3 := kind{"STD3", p.ToASCII}
   127  
   128  	testCases := []struct {
   129  		kind
   130  		input   string
   131  		want    string
   132  		wantErr string
   133  	}{
   134  		{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
   135  		{lengthA, "", "", "A4"},
   136  
   137  		{lengthU, "xn--", "", "A4"},
   138  		{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
   139  		{lengthU, "xn--.foo", ".foo", "A4"},
   140  		{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
   141  
   142  		{display, "xn--", "", ""},
   143  		{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
   144  		{display, "xn--.foo", ".foo", ""},
   145  		{display, "foo.xn--.bar", "foo..bar", ""},
   146  
   147  		{lengthA, "a..b", "a..b", "A4"},
   148  		{punyA, ".b", ".b", ""},
   149  		// For backwards compatibility, the Punycode profile does not map runes.
   150  		{punyA, "\u3002b", "xn--b-83t", ""},
   151  		{punyA, "..b", "..b", ""},
   152  
   153  		{lengthA, ".b", ".b", "A4"},
   154  		{lengthA, "\u3002b", ".b", "A4"},
   155  		{lengthA, "..b", "..b", "A4"},
   156  		{lengthA, "b..", "b..", ""},
   157  
   158  		// Sharpened Bidi rules for Unicode 10.0.0. Apply for ALL labels in ANY
   159  		// of the labels is RTL.
   160  		{lengthA, "\ufe05\u3002\u3002\U0002603e\u1ce0", "..xn--t6f5138v", "A4"},
   161  		{lengthA, "FAX\u2a77\U0001d186\u3002\U0001e942\U000e0181\u180c", "", "B6"},
   162  
   163  		{resolve, "a..b", "a..b", ""},
   164  		// Note that leading dots are not stripped. This is to be consistent
   165  		// with the Punycode profile as well as the conformance test.
   166  		{resolve, ".b", ".b", ""},
   167  		{resolve, "\u3002b", ".b", ""},
   168  		{resolve, "..b", "..b", ""},
   169  		{resolve, "b..", "b..", ""},
   170  		{resolve, "\xed", "", "P1"},
   171  
   172  		// Raw punycode
   173  		{punyA, "", "", ""},
   174  		{punyA, "*.foo.com", "*.foo.com", ""},
   175  		{punyA, "Foo.com", "Foo.com", ""},
   176  
   177  		// STD3 rules
   178  		{display, "*.foo.com", "*.foo.com", "P1"},
   179  		{std3, "*.foo.com", "*.foo.com", ""},
   180  
   181  		// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
   182  		// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
   183  		// lab9.be.
   184  		{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
   185  		{display, "lab⒐be", "lab⒐be", "P1"},
   186  
   187  		{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
   188  		{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
   189  
   190  		// Chrome 54.0 recognizes the error and treats this input verbatim as a
   191  		// search string.
   192  		// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
   193  		// punycode on the result using transitional mapping.
   194  		// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
   195  		// seems to be nested punycode encodings.
   196  		{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
   197  		{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
   198  
   199  		{resolve, "a\u200Cb", "ab", ""},
   200  		{display, "a\u200Cb", "a\u200Cb", "C"},
   201  
   202  		{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
   203  		{display, "a\u200Cb", "a\u200Cb", "C"},
   204  
   205  		{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
   206  		{
   207  			// Notice how the string gets transformed, even with an error.
   208  			// Chrome will use the original string if it finds an error, so not
   209  			// the transformed one.
   210  			display,
   211  			"gr\ufecb\ufeae\ufe91\ufef2.de",
   212  			"gr\u0639\u0631\u0628\u064a.de",
   213  			"B",
   214  		},
   215  
   216  		{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
   217  		{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
   218  
   219  		// normalize input
   220  		{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
   221  		{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
   222  
   223  		// Non-normalized strings are not normalized when they originate from
   224  		// punycode. Despite the error, Chrome, Safari and Firefox will attempt
   225  		// to look up the input punycode.
   226  		{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
   227  		{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
   228  	}
   229  
   230  	for _, tc := range testCases {
   231  		doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
   232  	}
   233  }
   234  
   235  func TestConformance(t *testing.T) {
   236  	testtext.SkipIfNotLong(t)
   237  
   238  	r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
   239  	defer r.Close()
   240  
   241  	section := "main"
   242  	started := false
   243  	p := ucd.New(r, ucd.CommentHandler(func(s string) {
   244  		if started {
   245  			section = strings.ToLower(strings.Split(s, " ")[0])
   246  		}
   247  	}))
   248  	transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup())
   249  	nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup())
   250  	for p.Next() {
   251  		started = true
   252  
   253  		// What to test
   254  		profiles := []*Profile{}
   255  		switch p.String(0) {
   256  		case "T":
   257  			profiles = append(profiles, transitional)
   258  		case "N":
   259  			profiles = append(profiles, nonTransitional)
   260  		case "B":
   261  			profiles = append(profiles, transitional)
   262  			profiles = append(profiles, nonTransitional)
   263  		}
   264  
   265  		src := unescape(p.String(1))
   266  
   267  		wantToUnicode := unescape(p.String(2))
   268  		if wantToUnicode == "" {
   269  			wantToUnicode = src
   270  		}
   271  		wantToASCII := unescape(p.String(3))
   272  		if wantToASCII == "" {
   273  			wantToASCII = wantToUnicode
   274  		}
   275  		wantErrToUnicode := ""
   276  		if strings.HasPrefix(wantToUnicode, "[") {
   277  			wantErrToUnicode = wantToUnicode
   278  			wantToUnicode = ""
   279  		}
   280  		wantErrToASCII := ""
   281  		if strings.HasPrefix(wantToASCII, "[") {
   282  			wantErrToASCII = wantToASCII
   283  			wantToASCII = ""
   284  		}
   285  
   286  		// TODO: also do IDNA tests.
   287  		// invalidInIDNA2008 := p.String(4) == "NV8"
   288  
   289  		for _, p := range profiles {
   290  			name := fmt.Sprintf("%s:%s", section, p)
   291  			doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
   292  			doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
   293  		}
   294  	}
   295  }
   296  
   297  func unescape(s string) string {
   298  	s, err := strconv.Unquote(`"` + s + `"`)
   299  	if err != nil {
   300  		panic(err)
   301  	}
   302  	return s
   303  }
   304  
   305  func BenchmarkProfile(b *testing.B) {
   306  	for i := 0; i < b.N; i++ {
   307  		Lookup.ToASCII("www.yahoogle.com")
   308  	}
   309  }