github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/data_test.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  type matchTest struct {
     8  	comment   string
     9  	supported string
    10  	test      []struct{ match, desired string }
    11  }
    12  
    13  var matchTests = []matchTest{
    14  	{
    15  		"basics",
    16  		"fr, en-GB, en",
    17  		[]struct{ match, desired string }{
    18  			{"en-GB", "en-GB"},
    19  			{"en", "en-US"},
    20  			{"fr", "fr-FR"},
    21  			{"fr", "ja-JP"},
    22  		},
    23  	},
    24  	{
    25  		"script fallbacks",
    26  		"zh-CN, zh-TW, iw",
    27  		[]struct{ match, desired string }{
    28  			{"zh-TW", "zh-Hant"},
    29  			{"zh-CN", "zh"},
    30  			{"zh-CN", "zh-Hans-CN"},
    31  			{"zh-TW", "zh-Hant-HK"},
    32  			{"iw", "he-IT"},
    33  		},
    34  	},
    35  	{
    36  		"language-specific script fallbacks 1",
    37  		"en, sr, nl",
    38  		[]struct{ match, desired string }{
    39  			{"sr", "sr-Latn"},
    40  			{"en", "sh"},
    41  			{"en", "hr"},
    42  			{"en", "bs"},
    43  			{"en", "nl-Cyrl"},
    44  		},
    45  	},
    46  	{
    47  		"language-specific script fallbacks 2",
    48  		"en, sh",
    49  		[]struct{ match, desired string }{
    50  			{"sh", "sr"},
    51  			{"sh", "sr-Cyrl"},
    52  			{"sh", "hr"},
    53  		},
    54  	},
    55  	{
    56  		"both deprecated and not",
    57  		"fil, tl, iw, he",
    58  		[]struct{ match, desired string }{
    59  			{"he", "he-IT"},
    60  			{"he", "he"},
    61  			{"iw", "iw"},
    62  			{"fil", "fil-IT"},
    63  			{"fil", "fil"},
    64  			{"tl", "tl"},
    65  		},
    66  	},
    67  	{
    68  		"nearby languages",
    69  		"en, fil, ro, nn",
    70  		[]struct{ match, desired string }{
    71  			{"fil", "tl"},
    72  			{"ro", "mo"},
    73  			{"nn", "nb"},
    74  			{"en", "ja"}, // make sure default works
    75  		},
    76  	},
    77  	{
    78  		"nearby languages: Nynorsk to Bokmål",
    79  		"en, nb",
    80  		[]struct{ match, desired string }{
    81  			{"nb", "nn"},
    82  		},
    83  	},
    84  	{
    85  		"nearby languages: Danish does not match nn",
    86  		"en, nn",
    87  		[]struct{ match, desired string }{
    88  			{"en", "da"},
    89  		},
    90  	},
    91  	{
    92  		"nearby languages: Danish matches no",
    93  		"en, no",
    94  		[]struct{ match, desired string }{
    95  			{"no", "da"},
    96  		},
    97  	},
    98  	{
    99  		"nearby languages: Danish matches nb",
   100  		"en, nb",
   101  		[]struct{ match, desired string }{
   102  			{"nb", "da"},
   103  		},
   104  	},
   105  	{
   106  		"prefer matching languages over language variants.",
   107  		"nn, en-GB",
   108  		[]struct{ match, desired string }{
   109  			{"en-GB", "no, en-US"},
   110  			{"en-GB", "nb, en-US"},
   111  		},
   112  	},
   113  	{
   114  		"deprecated version is closer than same language with other differences",
   115  		"nl, he, en-GB",
   116  		[]struct{ match, desired string }{
   117  			{"he", "iw, en-US"},
   118  		},
   119  	},
   120  	{
   121  		"macro equivalent is closer than same language with other differences",
   122  		"nl, zh, en-GB, no",
   123  		[]struct{ match, desired string }{
   124  			{"zh", "cmn, en-US"},
   125  			{"no", "nb, en-US"},
   126  		},
   127  	},
   128  	{
   129  		"legacy equivalent is closer than same language with other differences",
   130  		"nl, fil, en-GB",
   131  		[]struct{ match, desired string }{
   132  			{"fil", "tl, en-US"},
   133  		},
   134  	},
   135  	{
   136  		"exact over equivalent",
   137  		"en, ro, mo, ro-MD",
   138  		[]struct{ match, desired string }{
   139  			{"ro", "ro"},
   140  			{"mo", "mo"},
   141  			{"ro-MD", "ro-MD"},
   142  		},
   143  	},
   144  	{
   145  		"maximization of legacy",
   146  		"sr-Cyrl, sr-Latn, ro, ro-MD",
   147  		[]struct{ match, desired string }{
   148  			{"sr-Latn", "sh"},
   149  			{"ro-MD", "mo"},
   150  		},
   151  	},
   152  	{
   153  		"empty",
   154  		"",
   155  		[]struct{ match, desired string }{
   156  			{"und", "fr"},
   157  			{"und", "en"},
   158  		},
   159  	},
   160  	{
   161  		"private use subtags",
   162  		"fr, en-GB, x-bork, es-ES, es-419",
   163  		[]struct{ match, desired string }{
   164  			{"fr", "x-piglatin"},
   165  			{"x-bork", "x-bork"},
   166  		},
   167  	},
   168  	{
   169  		"grandfathered codes",
   170  		"fr, i-klingon, en-Latn-US",
   171  		[]struct{ match, desired string }{
   172  			{"en-Latn-US", "en-GB-oed"},
   173  			{"tlh", "i-klingon"},
   174  		},
   175  	},
   176  	{
   177  		"exact match",
   178  		"fr, en-GB, ja, es-ES, es-MX",
   179  		[]struct{ match, desired string }{
   180  			{"ja", "ja, de"},
   181  		},
   182  	},
   183  	{
   184  		"simple variant match",
   185  		"fr, en-GB, ja, es-ES, es-MX",
   186  		[]struct{ match, desired string }{
   187  			// Intentionally avoiding a perfect-match or two candidates for variant matches.
   188  			{"en-GB", "de, en-US"},
   189  			// Fall back.
   190  			{"fr", "de, zh"},
   191  		},
   192  	},
   193  	{
   194  		"best match for traditional Chinese",
   195  		// Scenario: An application that only supports Simplified Chinese (and some
   196  		// other languages), but does not support Traditional Chinese. zh-Hans-CN
   197  		// could be replaced with zh-CN, zh, or zh-Hans, it wouldn't make much of
   198  		// a difference.
   199  		"fr, zh-Hans-CN, en-US",
   200  		[]struct{ match, desired string }{
   201  			{"zh-Hans-CN", "zh-TW"},
   202  			{"zh-Hans-CN", "zh-Hant"},
   203  			// One can avoid a zh-Hant to zh-Hans match by including a second language
   204  			// preference which is a better match.
   205  			{"en-US", "zh-TW, en"},
   206  			{"en-US", "zh-Hant-CN, en"},
   207  			{"zh-Hans-CN", "zh-Hans, en"},
   208  		},
   209  	},
   210  	// More specific region and script tie-breakers.
   211  	{
   212  		"more specific script should win in case regions are identical",
   213  		"af, af-Latn, af-Arab",
   214  		[]struct{ match, desired string }{
   215  			{"af", "af"},
   216  			{"af", "af-ZA"},
   217  			{"af-Latn", "af-Latn-ZA"},
   218  			{"af-Latn", "af-Latn"},
   219  		},
   220  	},
   221  	{
   222  		"more specific region should win",
   223  		"nl, nl-NL, nl-BE",
   224  		[]struct{ match, desired string }{
   225  			{"nl", "nl"},
   226  			{"nl", "nl-Latn"},
   227  			{"nl-NL", "nl-Latn-NL"},
   228  			{"nl-NL", "nl-NL"},
   229  		},
   230  	},
   231  	{
   232  		"more specific region wins over more specific script",
   233  		"nl, nl-Latn, nl-NL, nl-BE",
   234  		[]struct{ match, desired string }{
   235  			{"nl", "nl"},
   236  			{"nl-Latn", "nl-Latn"},
   237  			{"nl-NL", "nl-NL"},
   238  			{"nl-NL", "nl-Latn-NL"},
   239  		},
   240  	},
   241  	// Region distance tie-breakers.
   242  	{
   243  		"region distance Portuguese",
   244  		"pt, pt-PT",
   245  		[]struct{ match, desired string }{
   246  			{"pt-PT", "pt-ES"},
   247  		},
   248  	},
   249  	{
   250  		"region distance French",
   251  		"en, fr, fr-CA, fr-CH",
   252  		[]struct{ match, desired string }{
   253  			{"fr-CA", "fr-US"},
   254  		},
   255  	},
   256  	{
   257  		"region distance German",
   258  		"de-AT, de-DE, de-CH",
   259  		[]struct{ match, desired string }{
   260  			{"de-DE", "de"},
   261  		},
   262  	},
   263  	{
   264  		"en-AU is closer to en-GB than to en (which is en-US)",
   265  		"en, en-GB, es-ES, es-419",
   266  		[]struct{ match, desired string }{
   267  			{"en-GB", "en-AU"},
   268  			{"es-419", "es-MX"},
   269  			{"es-ES", "es-PT"},
   270  		},
   271  	},
   272  	// Test exceptions with "und".
   273  	// When the undefined language doesn't match anything in the list, return the default, as usual.
   274  	// max("und") = "en-Latn-US", and since matching is based on maximized tags, the undefined
   275  	// language would normally match English.  But that would produce the counterintuitive results.
   276  	// Matching "und" to "it,en" would be "en" matching "en" to "it,und" would be "und".
   277  	// To avoid this max("und") is defined as "und"
   278  	{
   279  		"undefined",
   280  		"it, fr",
   281  		[]struct{ match, desired string }{
   282  			{"it", "und"},
   283  		},
   284  	},
   285  	{
   286  		"und does not match en",
   287  		"it, en",
   288  		[]struct{ match, desired string }{
   289  			{"it", "und"},
   290  		},
   291  	},
   292  	{
   293  		"undefined in priority list",
   294  		"it, und",
   295  		[]struct{ match, desired string }{
   296  			{"und", "und"},
   297  			{"it", "en"},
   298  		},
   299  	},
   300  	// Undefined scripts and regions.
   301  	{
   302  		"undefined",
   303  		"it, fr, zh",
   304  		[]struct{ match, desired string }{
   305  			{"fr", "und-FR"},
   306  			{"zh", "und-CN"},
   307  			{"zh", "und-Hans"},
   308  			{"zh", "und-Hant"},
   309  			{"it", "und-Latn"},
   310  		},
   311  	},
   312  	// Early termination conditions: do not consider all desired strings if
   313  	// a match is good enough.
   314  	{
   315  		"match on maximized tag",
   316  		"fr, en-GB, ja, es-ES, es-MX",
   317  		[]struct{ match, desired string }{
   318  			// ja-JP matches ja on likely subtags, and it's listed first,
   319  			// thus it wins over the second preference en-GB.
   320  			{"ja", "ja-JP, en-GB"},
   321  			{"ja", "ja-Jpan-JP, en-GB"},
   322  		},
   323  	},
   324  	{
   325  		"pick best maximized tag",
   326  		"ja, ja-Jpan-US, ja-JP, en, ru",
   327  		[]struct{ match, desired string }{
   328  			{"ja", "ja-Jpan, ru"},
   329  			{"ja-JP", "ja-JP, ru"},
   330  			{"ja-Jpan-US", "ja-US, ru"},
   331  		},
   332  	},
   333  	{
   334  		"termination: pick best maximized match",
   335  		"ja, ja-Jpan, ja-JP, en, ru",
   336  		[]struct{ match, desired string }{
   337  			{"ja-JP", "ja-Jpan-JP, ru"},
   338  			{"ja-Jpan", "ja-Jpan, ru"},
   339  		},
   340  	},
   341  	{
   342  		"no match on maximized",
   343  		"en, de, fr, ja",
   344  		[]struct{ match, desired string }{
   345  			// de maximizes to de-DE.
   346  			// Pick the exact match for the secondary language instead.
   347  			{"fr", "de-CH, fr"},
   348  		},
   349  	},
   350  
   351  	// Test that the CLDR parent relations are correctly preserved by the matcher.
   352  	// These matches may change for different CLDR versions.
   353  	{
   354  		"parent relation preserved",
   355  		"en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh,  zh-Hant, zh-Hant-HK",
   356  		[]struct{ match, desired string }{
   357  			{"en-GB", "en-150"},
   358  			{"en-GB", "en-AU"},
   359  			{"en-GB", "en-BE"},
   360  			{"en-GB", "en-GG"},
   361  			{"en-GB", "en-GI"},
   362  			{"en-GB", "en-HK"},
   363  			{"en-GB", "en-IE"},
   364  			{"en-GB", "en-IM"},
   365  			{"en-GB", "en-IN"},
   366  			{"en-GB", "en-JE"},
   367  			{"en-GB", "en-MT"},
   368  			{"en-GB", "en-NZ"},
   369  			{"en-GB", "en-PK"},
   370  			{"en-GB", "en-SG"},
   371  			{"en-GB", "en-DE"},
   372  			{"en-GB", "en-MT"},
   373  			{"es-419", "es-AR"},
   374  			{"es-419", "es-BO"},
   375  			{"es-419", "es-CL"},
   376  			{"es-419", "es-CO"},
   377  			{"es-419", "es-CR"},
   378  			{"es-419", "es-CU"},
   379  			{"es-419", "es-DO"},
   380  			{"es-419", "es-EC"},
   381  			{"es-419", "es-GT"},
   382  			{"es-419", "es-HN"},
   383  			{"es-419", "es-MX"},
   384  			{"es-419", "es-NI"},
   385  			{"es-419", "es-PA"},
   386  			{"es-419", "es-PE"},
   387  			{"es-419", "es-PR"},
   388  			{"es-419", "es-PY"},
   389  			{"es-419", "es-SV"},
   390  			{"es-419", "es-US"},
   391  			{"es-419", "es-UY"},
   392  			{"es-419", "es-VE"},
   393  			{"pt-PT", "pt-AO"},
   394  			{"pt-PT", "pt-CV"},
   395  			{"pt-PT", "pt-GW"},
   396  			{"pt-PT", "pt-MO"},
   397  			{"pt-PT", "pt-MZ"},
   398  			{"pt-PT", "pt-ST"},
   399  			{"pt-PT", "pt-TL"},
   400  			// TODO for CLDR 24+
   401  			// - en-001
   402  			// - {"zh-Hant-HK", "zh-Hant-MO"},
   403  		},
   404  	},
   405  	// Options and variants are inherited from user-defined settings.
   406  	{
   407  		"preserve Unicode extension",
   408  		"en, de, sl-nedis",
   409  		[]struct{ match, desired string }{
   410  			{"de-u-co-phonebk", "de-FR-u-co-phonebk"},
   411  			{"sl-nedis-u-cu-eur", "sl-nedis-u-cu-eur"},
   412  			{"sl-nedis-u-cu-eur", "sl-u-cu-eur"},
   413  			{"sl-nedis-u-cu-eur", "sl-HR-nedis-u-cu-eur"},
   414  		},
   415  	},
   416  }