golang.org/x/text@v0.14.0/language/display/lookup.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package display 6 7 // This file contains common lookup code that is shared between the various 8 // implementations of Namer and Dictionaries. 9 10 import ( 11 "fmt" 12 "sort" 13 "strings" 14 15 "golang.org/x/text/language" 16 ) 17 18 type namer interface { 19 // name gets the string for the given index. It should walk the 20 // inheritance chain if a value is not present in the base index. 21 name(idx int) string 22 } 23 24 func nameLanguage(n namer, x interface{}) string { 25 t, _ := language.All.Compose(x) 26 for { 27 i, _, _ := langTagSet.index(t.Raw()) 28 if s := n.name(i); s != "" { 29 return s 30 } 31 if t = t.Parent(); t == language.Und { 32 return "" 33 } 34 } 35 } 36 37 func nameScript(n namer, x interface{}) string { 38 t, _ := language.DeprecatedScript.Compose(x) 39 _, s, _ := t.Raw() 40 return n.name(scriptIndex.index(s.String())) 41 } 42 43 func nameRegion(n namer, x interface{}) string { 44 t, _ := language.DeprecatedRegion.Compose(x) 45 _, _, r := t.Raw() 46 return n.name(regionIndex.index(r.String())) 47 } 48 49 func nameTag(langN, scrN, regN namer, x interface{}) string { 50 t, ok := x.(language.Tag) 51 if !ok { 52 return "" 53 } 54 const form = language.All &^ language.SuppressScript 55 if c, err := form.Canonicalize(t); err == nil { 56 t = c 57 } 58 _, sRaw, rRaw := t.Raw() 59 i, scr, reg := langTagSet.index(t.Raw()) 60 for i != -1 { 61 if str := langN.name(i); str != "" { 62 if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR { 63 ss, sr := "", "" 64 if hasS { 65 ss = scrN.name(scriptIndex.index(scr.String())) 66 } 67 if hasR { 68 sr = regN.name(regionIndex.index(reg.String())) 69 } 70 // TODO: use patterns in CLDR or at least confirm they are the 71 // same for all languages. 72 if ss != "" && sr != "" { 73 return fmt.Sprintf("%s (%s, %s)", str, ss, sr) 74 } 75 if ss != "" || sr != "" { 76 return fmt.Sprintf("%s (%s%s)", str, ss, sr) 77 } 78 } 79 return str 80 } 81 scr, reg = sRaw, rRaw 82 if t = t.Parent(); t == language.Und { 83 return "" 84 } 85 i, _, _ = langTagSet.index(t.Raw()) 86 } 87 return "" 88 } 89 90 // header contains the data and indexes for a single namer. 91 // data contains a series of strings concatenated into one. index contains the 92 // offsets for a string in data. For example, consider a header that defines 93 // strings for the languages de, el, en, fi, and nl: 94 // 95 // header{ 96 // data: "GermanGreekEnglishDutch", 97 // index: []uint16{0, 6, 11, 18, 18, 23}, 98 // } 99 // 100 // For a language with index i, the string is defined by 101 // data[index[i]:index[i+1]]. So the number of elements in index is always one 102 // greater than the number of languages for which header defines a value. 103 // A string for a language may be empty, which means the name is undefined. In 104 // the above example, the name for fi (Finnish) is undefined. 105 type header struct { 106 data string 107 index []uint16 108 } 109 110 // name looks up the name for a tag in the dictionary, given its index. 111 func (h *header) name(i int) string { 112 if 0 <= i && i < len(h.index)-1 { 113 return h.data[h.index[i]:h.index[i+1]] 114 } 115 return "" 116 } 117 118 // tagSet is used to find the index of a language in a set of tags. 119 type tagSet struct { 120 single tagIndex 121 long []string 122 } 123 124 var ( 125 langTagSet = tagSet{ 126 single: langIndex, 127 long: langTagsLong, 128 } 129 130 // selfTagSet is used for indexing the language strings in their own 131 // language. 132 selfTagSet = tagSet{ 133 single: selfIndex, 134 long: selfTagsLong, 135 } 136 137 zzzz = language.MustParseScript("Zzzz") 138 zz = language.MustParseRegion("ZZ") 139 ) 140 141 // index returns the index of the tag for the given base, script and region or 142 // its parent if the tag is not available. If the match is for a parent entry, 143 // the excess script and region are returned. 144 func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) { 145 lang := base.String() 146 index := -1 147 if (scr != language.Script{} || reg != language.Region{}) { 148 if scr == zzzz { 149 scr = language.Script{} 150 } 151 if reg == zz { 152 reg = language.Region{} 153 } 154 155 i := sort.SearchStrings(ts.long, lang) 156 // All entries have either a script or a region and not both. 157 scrStr, regStr := scr.String(), reg.String() 158 for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ { 159 if s := ts.long[i][len(lang)+1:]; s == scrStr { 160 scr = language.Script{} 161 index = i + ts.single.len() 162 break 163 } else if s == regStr { 164 reg = language.Region{} 165 index = i + ts.single.len() 166 break 167 } 168 } 169 } 170 if index == -1 { 171 index = ts.single.index(lang) 172 } 173 return index, scr, reg 174 } 175 176 func (ts *tagSet) Tags() []language.Tag { 177 tags := make([]language.Tag, 0, ts.single.len()+len(ts.long)) 178 ts.single.keys(func(s string) { 179 tags = append(tags, language.Raw.MustParse(s)) 180 }) 181 for _, s := range ts.long { 182 tags = append(tags, language.Raw.MustParse(s)) 183 } 184 return tags 185 } 186 187 func supportedScripts() []language.Script { 188 scr := make([]language.Script, 0, scriptIndex.len()) 189 scriptIndex.keys(func(s string) { 190 scr = append(scr, language.MustParseScript(s)) 191 }) 192 return scr 193 } 194 195 func supportedRegions() []language.Region { 196 reg := make([]language.Region, 0, regionIndex.len()) 197 regionIndex.keys(func(s string) { 198 reg = append(reg, language.MustParseRegion(s)) 199 }) 200 return reg 201 } 202 203 // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string 204 // for each length, which can be used in combination with binary search to get 205 // the index associated with a tag. 206 // For example, a tagIndex{ 207 // 208 // "arenesfrruzh", // 6 2-byte tags. 209 // "barwae", // 2 3-byte tags. 210 // "", 211 // 212 // } 213 // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag 214 // "wae" had an index of 7. 215 type tagIndex [3]string 216 217 func (t *tagIndex) index(s string) int { 218 sz := len(s) 219 if sz < 2 || 4 < sz { 220 return -1 221 } 222 a := t[sz-2] 223 index := sort.Search(len(a)/sz, func(i int) bool { 224 p := i * sz 225 return a[p:p+sz] >= s 226 }) 227 p := index * sz 228 if end := p + sz; end > len(a) || a[p:end] != s { 229 return -1 230 } 231 // Add the number of tags for smaller sizes. 232 for i := 0; i < sz-2; i++ { 233 index += len(t[i]) / (i + 2) 234 } 235 return index 236 } 237 238 // len returns the number of tags that are contained in the tagIndex. 239 func (t *tagIndex) len() (n int) { 240 for i, s := range t { 241 n += len(s) / (i + 2) 242 } 243 return n 244 } 245 246 // keys calls f for each tag. 247 func (t *tagIndex) keys(f func(key string)) { 248 for i, s := range *t { 249 for ; s != ""; s = s[i+2:] { 250 f(s[:i+2]) 251 } 252 } 253 }