golang.org/x/text@v0.14.0/internal/language/match_test.go

golang.org/x/text@v0.14.0/internal/language/match_test.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import (
     8  	"flag"
     9  	"testing"
    10  )
    11  
    12  var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
    13  
    14  func TestAddLikelySubtags(t *testing.T) {
    15  	tests := []struct{ in, out string }{
    16  		{"aa", "aa-Latn-ET"},
    17  		{"aa-Latn", "aa-Latn-ET"},
    18  		{"aa-Arab", "aa-Arab-ET"},
    19  		{"aa-Arab-ER", "aa-Arab-ER"},
    20  		{"kk", "kk-Cyrl-KZ"},
    21  		{"kk-CN", "kk-Arab-CN"},
    22  		{"cmn", "cmn"},
    23  		{"zh-AU", "zh-Hant-AU"},
    24  		{"zh-VN", "zh-Hant-VN"},
    25  		{"zh-SG", "zh-Hans-SG"},
    26  		{"zh-Hant", "zh-Hant-TW"},
    27  		{"zh-Hani", "zh-Hani-CN"},
    28  		{"und-Hani", "zh-Hani-CN"},
    29  		{"und", "en-Latn-US"},
    30  		{"und-GB", "en-Latn-GB"},
    31  		{"und-CW", "pap-Latn-CW"},
    32  		{"und-YT", "fr-Latn-YT"},
    33  		{"und-Arab", "ar-Arab-EG"},
    34  		{"und-AM", "hy-Armn-AM"},
    35  		{"und-TW", "zh-Hant-TW"},
    36  		{"und-002", "en-Latn-NG"},
    37  		{"und-Latn-002", "en-Latn-NG"},
    38  		{"en-Latn-002", "en-Latn-NG"},
    39  		{"en-002", "en-Latn-NG"},
    40  		{"en-001", "en-Latn-US"},
    41  		{"und-003", "en-Latn-US"},
    42  		{"und-GB", "en-Latn-GB"},
    43  		{"Latn-001", "en-Latn-US"},
    44  		{"en-001", "en-Latn-US"},
    45  		{"es-419", "es-Latn-419"},
    46  		{"he-145", "he-Hebr-IL"},
    47  		{"ky-145", "ky-Latn-TR"},
    48  		{"kk", "kk-Cyrl-KZ"},
    49  		// Don't specialize duplicate and ambiguous matches.
    50  		{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
    51  		{"ku-145", "ku-Latn-TR"},  // Matches IQ, TR, and LB, but kk -> TR.
    52  		{"und-Arab-CC", "ms-Arab-CC"},
    53  		{"und-Arab-GB", "ks-Arab-GB"},
    54  		{"und-Hans-CC", "zh-Hans-CC"},
    55  		{"und-CC", "en-Latn-CC"},
    56  		{"sr", "sr-Cyrl-RS"},
    57  		{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
    58  		// We would like addLikelySubtags to generate the same results if the input
    59  		// only changes by adding tags that would otherwise have been added
    60  		// by the expansion.
    61  		// In other words:
    62  		//     und-AA -> xx-Scrp-AA   implies und-Scrp-AA -> xx-Scrp-AA
    63  		//     und-AA -> xx-Scrp-AA   implies xx-AA -> xx-Scrp-AA
    64  		//     und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
    65  		//     und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
    66  		//     xx -> xx-Scrp-AA       implies xx-Scrp -> xx-Scrp-AA
    67  		//     xx -> xx-Scrp-AA       implies xx-AA -> xx-Scrp-AA
    68  		//
    69  		// The algorithm specified in
    70  		//   https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
    71  		// Section C.10, does not handle the first case. For example,
    72  		// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
    73  		// there is no rule for und-Latn-BJ.  According to spec, und-Latn-BJ
    74  		// would expand to en-Latn-BJ, violating the aforementioned principle.
    75  		// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
    76  		// if a rule of the form und-AA -> xx-Scrp-AA is defined.
    77  		// Note that as of version 23, CLDR has some explicitly specified
    78  		// entries that do not conform to these rules. The implementation
    79  		// will not correct these explicit inconsistencies. A later versions of CLDR
    80  		// is supposed to fix this.
    81  		{"und-Latn-BJ", "fr-Latn-BJ"},
    82  		{"und-Bugi-ID", "bug-Bugi-ID"},
    83  		// regions, scripts and languages without definitions
    84  		{"und-Arab-AA", "ar-Arab-AA"},
    85  		{"und-Afak-RE", "fr-Afak-RE"},
    86  		{"und-Arab-GB", "ks-Arab-GB"},
    87  		{"abp-Arab-GB", "abp-Arab-GB"},
    88  		// script has preference over region
    89  		{"und-Arab-NL", "ar-Arab-NL"},
    90  		{"zza", "zza-Latn-TR"},
    91  		// preserve variants and extensions
    92  		{"de-1901", "de-Latn-DE-1901"},
    93  		{"de-x-abc", "de-Latn-DE-x-abc"},
    94  		{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
    95  		{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
    96  	}
    97  	for i, tt := range tests {
    98  		in, _ := Parse(tt.in)
    99  		out, _ := Parse(tt.out)
   100  		in, _ = in.addLikelySubtags()
   101  		if in.String() != out.String() {
   102  			t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
   103  		}
   104  	}
   105  }
   106  func TestMinimize(t *testing.T) {
   107  	tests := []struct{ in, out string }{
   108  		{"aa", "aa"},
   109  		{"aa-Latn", "aa"},
   110  		{"aa-Latn-ET", "aa"},
   111  		{"aa-ET", "aa"},
   112  		{"aa-Arab", "aa-Arab"},
   113  		{"aa-Arab-ER", "aa-Arab-ER"},
   114  		{"aa-Arab-ET", "aa-Arab"},
   115  		{"und", "und"},
   116  		{"und-Latn", "und"},
   117  		{"und-Latn-US", "und"},
   118  		{"en-Latn-US", "en"},
   119  		{"cmn", "cmn"},
   120  		{"cmn-Hans", "cmn-Hans"},
   121  		{"cmn-Hant", "cmn-Hant"},
   122  		{"zh-AU", "zh-AU"},
   123  		{"zh-VN", "zh-VN"},
   124  		{"zh-SG", "zh-SG"},
   125  		{"zh-Hant", "zh-Hant"},
   126  		{"zh-Hant-TW", "zh-TW"},
   127  		{"zh-Hans", "zh"},
   128  		{"zh-Hani", "zh-Hani"},
   129  		{"und-Hans", "und-Hans"},
   130  		{"und-Hani", "und-Hani"},
   131  
   132  		{"und-CW", "und-CW"},
   133  		{"und-YT", "und-YT"},
   134  		{"und-Arab", "und-Arab"},
   135  		{"und-AM", "und-AM"},
   136  		{"und-Arab-CC", "und-Arab-CC"},
   137  		{"und-CC", "und-CC"},
   138  		{"und-Latn-BJ", "und-BJ"},
   139  		{"und-Bugi-ID", "und-Bugi"},
   140  		{"bug-Bugi-ID", "bug-Bugi"},
   141  		// regions, scripts and languages without definitions
   142  		{"und-Arab-AA", "und-Arab-AA"},
   143  		// preserve variants and extensions
   144  		{"de-Latn-1901", "de-1901"},
   145  		{"de-Latn-x-abc", "de-x-abc"},
   146  		{"de-DE-1901-x-abc", "de-1901-x-abc"},
   147  		{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
   148  	}
   149  	for i, tt := range tests {
   150  		in, _ := Parse(tt.in)
   151  		out, _ := Parse(tt.out)
   152  		min, _ := in.minimize()
   153  		if min.String() != out.String() {
   154  			t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
   155  		}
   156  		max, _ := min.addLikelySubtags()
   157  		if x, _ := in.addLikelySubtags(); x.String() != max.String() {
   158  			t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
   159  		}
   160  	}
   161  }