github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/language/lookup.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package language 6 7 import ( 8 "bytes" 9 "fmt" 10 "sort" 11 "strconv" 12 13 "golang.org/x/text/internal/tag" 14 ) 15 16 // findIndex tries to find the given tag in idx and returns a standardized error 17 // if it could not be found. 18 func findIndex(idx tag.Index, key []byte, form string) (index int, err error) { 19 if !tag.FixCase(form, key) { 20 return 0, errSyntax 21 } 22 i := idx.Index(key) 23 if i == -1 { 24 return 0, mkErrInvalid(key) 25 } 26 return i, nil 27 } 28 29 func searchUint(imap []uint16, key uint16) int { 30 return sort.Search(len(imap), func(i int) bool { 31 return imap[i] >= key 32 }) 33 } 34 35 type langID uint16 36 37 // getLangID returns the langID of s if s is a canonical subtag 38 // or langUnknown if s is not a canonical subtag. 39 func getLangID(s []byte) (langID, error) { 40 if len(s) == 2 { 41 return getLangISO2(s) 42 } 43 return getLangISO3(s) 44 } 45 46 // mapLang returns the mapped langID of id according to mapping m. 47 func normLang(id langID) (langID, langAliasType) { 48 k := sort.Search(len(langAliasMap), func(i int) bool { 49 return langAliasMap[i].from >= uint16(id) 50 }) 51 if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) { 52 return langID(langAliasMap[k].to), langAliasTypes[k] 53 } 54 return id, langAliasTypeUnknown 55 } 56 57 // getLangISO2 returns the langID for the given 2-letter ISO language code 58 // or unknownLang if this does not exist. 59 func getLangISO2(s []byte) (langID, error) { 60 if !tag.FixCase("zz", s) { 61 return 0, errSyntax 62 } 63 if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 { 64 return langID(i), nil 65 } 66 return 0, mkErrInvalid(s) 67 } 68 69 const base = 'z' - 'a' + 1 70 71 func strToInt(s []byte) uint { 72 v := uint(0) 73 for i := 0; i < len(s); i++ { 74 v *= base 75 v += uint(s[i] - 'a') 76 } 77 return v 78 } 79 80 // converts the given integer to the original ASCII string passed to strToInt. 81 // len(s) must match the number of characters obtained. 82 func intToStr(v uint, s []byte) { 83 for i := len(s) - 1; i >= 0; i-- { 84 s[i] = byte(v%base) + 'a' 85 v /= base 86 } 87 } 88 89 // getLangISO3 returns the langID for the given 3-letter ISO language code 90 // or unknownLang if this does not exist. 91 func getLangISO3(s []byte) (langID, error) { 92 if tag.FixCase("und", s) { 93 // first try to match canonical 3-letter entries 94 for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) { 95 if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] { 96 // We treat "und" as special and always translate it to "unspecified". 97 // Note that ZZ and Zzzz are private use and are not treated as 98 // unspecified by default. 99 id := langID(i) 100 if id == nonCanonicalUnd { 101 return 0, nil 102 } 103 return id, nil 104 } 105 } 106 if i := altLangISO3.Index(s); i != -1 { 107 return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil 108 } 109 n := strToInt(s) 110 if langNoIndex[n/8]&(1<<(n%8)) != 0 { 111 return langID(n) + langNoIndexOffset, nil 112 } 113 // Check for non-canonical uses of ISO3. 114 for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) { 115 if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] { 116 return langID(i), nil 117 } 118 } 119 return 0, mkErrInvalid(s) 120 } 121 return 0, errSyntax 122 } 123 124 // stringToBuf writes the string to b and returns the number of bytes 125 // written. cap(b) must be >= 3. 126 func (id langID) stringToBuf(b []byte) int { 127 if id >= langNoIndexOffset { 128 intToStr(uint(id)-langNoIndexOffset, b[:3]) 129 return 3 130 } else if id == 0 { 131 return copy(b, "und") 132 } 133 l := lang[id<<2:] 134 if l[3] == 0 { 135 return copy(b, l[:3]) 136 } 137 return copy(b, l[:2]) 138 } 139 140 // String returns the BCP 47 representation of the langID. 141 // Use b as variable name, instead of id, to ensure the variable 142 // used is consistent with that of Base in which this type is embedded. 143 func (b langID) String() string { 144 if b == 0 { 145 return "und" 146 } else if b >= langNoIndexOffset { 147 b -= langNoIndexOffset 148 buf := [3]byte{} 149 intToStr(uint(b), buf[:]) 150 return string(buf[:]) 151 } 152 l := lang.Elem(int(b)) 153 if l[3] == 0 { 154 return l[:3] 155 } 156 return l[:2] 157 } 158 159 // ISO3 returns the ISO 639-3 language code. 160 func (b langID) ISO3() string { 161 if b == 0 || b >= langNoIndexOffset { 162 return b.String() 163 } 164 l := lang.Elem(int(b)) 165 if l[3] == 0 { 166 return l[:3] 167 } else if l[2] == 0 { 168 return altLangISO3.Elem(int(l[3]))[:3] 169 } 170 // This allocation will only happen for 3-letter ISO codes 171 // that are non-canonical BCP 47 language identifiers. 172 return l[0:1] + l[2:4] 173 } 174 175 // IsPrivateUse reports whether this language code is reserved for private use. 176 func (b langID) IsPrivateUse() bool { 177 return langPrivateStart <= b && b <= langPrivateEnd 178 } 179 180 type regionID uint16 181 182 // getRegionID returns the region id for s if s is a valid 2-letter region code 183 // or unknownRegion. 184 func getRegionID(s []byte) (regionID, error) { 185 if len(s) == 3 { 186 if isAlpha(s[0]) { 187 return getRegionISO3(s) 188 } 189 if i, err := strconv.ParseUint(string(s), 10, 10); err == nil { 190 return getRegionM49(int(i)) 191 } 192 } 193 return getRegionISO2(s) 194 } 195 196 // getRegionISO2 returns the regionID for the given 2-letter ISO country code 197 // or unknownRegion if this does not exist. 198 func getRegionISO2(s []byte) (regionID, error) { 199 i, err := findIndex(regionISO, s, "ZZ") 200 if err != nil { 201 return 0, err 202 } 203 return regionID(i) + isoRegionOffset, nil 204 } 205 206 // getRegionISO3 returns the regionID for the given 3-letter ISO country code 207 // or unknownRegion if this does not exist. 208 func getRegionISO3(s []byte) (regionID, error) { 209 if tag.FixCase("ZZZ", s) { 210 for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) { 211 if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] { 212 return regionID(i) + isoRegionOffset, nil 213 } 214 } 215 for i := 0; i < len(altRegionISO3); i += 3 { 216 if tag.Compare(altRegionISO3[i:i+3], s) == 0 { 217 return regionID(altRegionIDs[i/3]), nil 218 } 219 } 220 return 0, mkErrInvalid(s) 221 } 222 return 0, errSyntax 223 } 224 225 func getRegionM49(n int) (regionID, error) { 226 if 0 < n && n <= 999 { 227 const ( 228 searchBits = 7 229 regionBits = 9 230 regionMask = 1<<regionBits - 1 231 ) 232 idx := n >> searchBits 233 buf := fromM49[m49Index[idx]:m49Index[idx+1]] 234 val := uint16(n) << regionBits // we rely on bits shifting out 235 i := sort.Search(len(buf), func(i int) bool { 236 return buf[i] >= val 237 }) 238 if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val { 239 return regionID(r & regionMask), nil 240 } 241 } 242 var e ValueError 243 fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n) 244 return 0, e 245 } 246 247 // normRegion returns a region if r is deprecated or 0 otherwise. 248 // TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ). 249 // TODO: consider mapping split up regions to new most populous one (like CLDR). 250 func normRegion(r regionID) regionID { 251 m := regionOldMap 252 k := sort.Search(len(m), func(i int) bool { 253 return m[i].from >= uint16(r) 254 }) 255 if k < len(m) && m[k].from == uint16(r) { 256 return regionID(m[k].to) 257 } 258 return 0 259 } 260 261 const ( 262 iso3166UserAssigned = 1 << iota 263 ccTLD 264 bcp47Region 265 ) 266 267 func (r regionID) typ() byte { 268 return regionTypes[r] 269 } 270 271 // String returns the BCP 47 representation for the region. 272 // It returns "ZZ" for an unspecified region. 273 func (r regionID) String() string { 274 if r < isoRegionOffset { 275 if r == 0 { 276 return "ZZ" 277 } 278 return fmt.Sprintf("%03d", r.M49()) 279 } 280 r -= isoRegionOffset 281 return regionISO.Elem(int(r))[:2] 282 } 283 284 // ISO3 returns the 3-letter ISO code of r. 285 // Note that not all regions have a 3-letter ISO code. 286 // In such cases this method returns "ZZZ". 287 func (r regionID) ISO3() string { 288 if r < isoRegionOffset { 289 return "ZZZ" 290 } 291 r -= isoRegionOffset 292 reg := regionISO.Elem(int(r)) 293 switch reg[2] { 294 case 0: 295 return altRegionISO3[reg[3]:][:3] 296 case ' ': 297 return "ZZZ" 298 } 299 return reg[0:1] + reg[2:4] 300 } 301 302 // M49 returns the UN M.49 encoding of r, or 0 if this encoding 303 // is not defined for r. 304 func (r regionID) M49() int { 305 return int(m49[r]) 306 } 307 308 // IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This 309 // may include private-use tags that are assigned by CLDR and used in this 310 // implementation. So IsPrivateUse and IsCountry can be simultaneously true. 311 func (r regionID) IsPrivateUse() bool { 312 return r.typ()&iso3166UserAssigned != 0 313 } 314 315 type scriptID uint8 316 317 // getScriptID returns the script id for string s. It assumes that s 318 // is of the format [A-Z][a-z]{3}. 319 func getScriptID(idx tag.Index, s []byte) (scriptID, error) { 320 i, err := findIndex(idx, s, "Zzzz") 321 return scriptID(i), err 322 } 323 324 // String returns the script code in title case. 325 // It returns "Zzzz" for an unspecified script. 326 func (s scriptID) String() string { 327 if s == 0 { 328 return "Zzzz" 329 } 330 return script.Elem(int(s)) 331 } 332 333 // IsPrivateUse reports whether this script code is reserved for private use. 334 func (s scriptID) IsPrivateUse() bool { 335 return _Qaaa <= s && s <= _Qabx 336 } 337 338 const ( 339 maxAltTaglen = len("en-US-POSIX") 340 maxLen = maxAltTaglen 341 ) 342 343 var ( 344 // grandfatheredMap holds a mapping from legacy and grandfathered tags to 345 // their base language or index to more elaborate tag. 346 grandfatheredMap = map[[maxLen]byte]int16{ 347 [maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban 348 [maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami 349 [maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn 350 [maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak 351 [maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon 352 [maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux 353 [maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo 354 [maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn 355 [maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao 356 [maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay 357 [maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu 358 [maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok 359 [maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn 360 [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR 361 [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL 362 [maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE 363 [maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu 364 [maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka 365 [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan 366 [maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang 367 368 // Grandfathered tags with no modern replacement will be converted as 369 // follows: 370 [maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish 371 [maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed 372 [maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default 373 [maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian 374 [maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo 375 [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min 376 377 // CLDR-specific tag. 378 [maxLen]byte{'r', 'o', 'o', 't'}: 0, // root 379 [maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX" 380 } 381 382 altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102} 383 384 altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix" 385 ) 386 387 func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) { 388 if v, ok := grandfatheredMap[s]; ok { 389 if v < 0 { 390 return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true 391 } 392 t.lang = langID(v) 393 return t, true 394 } 395 return t, false 396 }