golang.org/x/text@v0.14.0/internal/language/match_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "flag" 9 "testing" 10 ) 11 12 var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers") 13 14 func TestAddLikelySubtags(t *testing.T) { 15 tests := []struct{ in, out string }{ 16 {"aa", "aa-Latn-ET"}, 17 {"aa-Latn", "aa-Latn-ET"}, 18 {"aa-Arab", "aa-Arab-ET"}, 19 {"aa-Arab-ER", "aa-Arab-ER"}, 20 {"kk", "kk-Cyrl-KZ"}, 21 {"kk-CN", "kk-Arab-CN"}, 22 {"cmn", "cmn"}, 23 {"zh-AU", "zh-Hant-AU"}, 24 {"zh-VN", "zh-Hant-VN"}, 25 {"zh-SG", "zh-Hans-SG"}, 26 {"zh-Hant", "zh-Hant-TW"}, 27 {"zh-Hani", "zh-Hani-CN"}, 28 {"und-Hani", "zh-Hani-CN"}, 29 {"und", "en-Latn-US"}, 30 {"und-GB", "en-Latn-GB"}, 31 {"und-CW", "pap-Latn-CW"}, 32 {"und-YT", "fr-Latn-YT"}, 33 {"und-Arab", "ar-Arab-EG"}, 34 {"und-AM", "hy-Armn-AM"}, 35 {"und-TW", "zh-Hant-TW"}, 36 {"und-002", "en-Latn-NG"}, 37 {"und-Latn-002", "en-Latn-NG"}, 38 {"en-Latn-002", "en-Latn-NG"}, 39 {"en-002", "en-Latn-NG"}, 40 {"en-001", "en-Latn-US"}, 41 {"und-003", "en-Latn-US"}, 42 {"und-GB", "en-Latn-GB"}, 43 {"Latn-001", "en-Latn-US"}, 44 {"en-001", "en-Latn-US"}, 45 {"es-419", "es-Latn-419"}, 46 {"he-145", "he-Hebr-IL"}, 47 {"ky-145", "ky-Latn-TR"}, 48 {"kk", "kk-Cyrl-KZ"}, 49 // Don't specialize duplicate and ambiguous matches. 50 {"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab. 51 {"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR. 52 {"und-Arab-CC", "ms-Arab-CC"}, 53 {"und-Arab-GB", "ks-Arab-GB"}, 54 {"und-Hans-CC", "zh-Hans-CC"}, 55 {"und-CC", "en-Latn-CC"}, 56 {"sr", "sr-Cyrl-RS"}, 57 {"sr-151", "sr-Latn-151"}, // Matches RO and RU. 58 // We would like addLikelySubtags to generate the same results if the input 59 // only changes by adding tags that would otherwise have been added 60 // by the expansion. 61 // In other words: 62 // und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA 63 // und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA 64 // und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA 65 // und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA 66 // xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA 67 // xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA 68 // 69 // The algorithm specified in 70 // https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data, 71 // Section C.10, does not handle the first case. For example, 72 // the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not 73 // there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ 74 // would expand to en-Latn-BJ, violating the aforementioned principle. 75 // We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA 76 // if a rule of the form und-AA -> xx-Scrp-AA is defined. 77 // Note that as of version 23, CLDR has some explicitly specified 78 // entries that do not conform to these rules. The implementation 79 // will not correct these explicit inconsistencies. A later versions of CLDR 80 // is supposed to fix this. 81 {"und-Latn-BJ", "fr-Latn-BJ"}, 82 {"und-Bugi-ID", "bug-Bugi-ID"}, 83 // regions, scripts and languages without definitions 84 {"und-Arab-AA", "ar-Arab-AA"}, 85 {"und-Afak-RE", "fr-Afak-RE"}, 86 {"und-Arab-GB", "ks-Arab-GB"}, 87 {"abp-Arab-GB", "abp-Arab-GB"}, 88 // script has preference over region 89 {"und-Arab-NL", "ar-Arab-NL"}, 90 {"zza", "zza-Latn-TR"}, 91 // preserve variants and extensions 92 {"de-1901", "de-Latn-DE-1901"}, 93 {"de-x-abc", "de-Latn-DE-x-abc"}, 94 {"de-1901-x-abc", "de-Latn-DE-1901-x-abc"}, 95 {"x-abc", "x-abc"}, // TODO: is this the desired behavior? 96 } 97 for i, tt := range tests { 98 in, _ := Parse(tt.in) 99 out, _ := Parse(tt.out) 100 in, _ = in.addLikelySubtags() 101 if in.String() != out.String() { 102 t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out) 103 } 104 } 105 } 106 func TestMinimize(t *testing.T) { 107 tests := []struct{ in, out string }{ 108 {"aa", "aa"}, 109 {"aa-Latn", "aa"}, 110 {"aa-Latn-ET", "aa"}, 111 {"aa-ET", "aa"}, 112 {"aa-Arab", "aa-Arab"}, 113 {"aa-Arab-ER", "aa-Arab-ER"}, 114 {"aa-Arab-ET", "aa-Arab"}, 115 {"und", "und"}, 116 {"und-Latn", "und"}, 117 {"und-Latn-US", "und"}, 118 {"en-Latn-US", "en"}, 119 {"cmn", "cmn"}, 120 {"cmn-Hans", "cmn-Hans"}, 121 {"cmn-Hant", "cmn-Hant"}, 122 {"zh-AU", "zh-AU"}, 123 {"zh-VN", "zh-VN"}, 124 {"zh-SG", "zh-SG"}, 125 {"zh-Hant", "zh-Hant"}, 126 {"zh-Hant-TW", "zh-TW"}, 127 {"zh-Hans", "zh"}, 128 {"zh-Hani", "zh-Hani"}, 129 {"und-Hans", "und-Hans"}, 130 {"und-Hani", "und-Hani"}, 131 132 {"und-CW", "und-CW"}, 133 {"und-YT", "und-YT"}, 134 {"und-Arab", "und-Arab"}, 135 {"und-AM", "und-AM"}, 136 {"und-Arab-CC", "und-Arab-CC"}, 137 {"und-CC", "und-CC"}, 138 {"und-Latn-BJ", "und-BJ"}, 139 {"und-Bugi-ID", "und-Bugi"}, 140 {"bug-Bugi-ID", "bug-Bugi"}, 141 // regions, scripts and languages without definitions 142 {"und-Arab-AA", "und-Arab-AA"}, 143 // preserve variants and extensions 144 {"de-Latn-1901", "de-1901"}, 145 {"de-Latn-x-abc", "de-x-abc"}, 146 {"de-DE-1901-x-abc", "de-1901-x-abc"}, 147 {"x-abc", "x-abc"}, // TODO: is this the desired behavior? 148 } 149 for i, tt := range tests { 150 in, _ := Parse(tt.in) 151 out, _ := Parse(tt.out) 152 min, _ := in.minimize() 153 if min.String() != out.String() { 154 t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out) 155 } 156 max, _ := min.addLikelySubtags() 157 if x, _ := in.addLikelySubtags(); x.String() != max.String() { 158 t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x) 159 } 160 } 161 }