github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/display/lookup.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package display
     6  
     7  // This file contains common lookup code that is shared between the various
     8  // implementations of Namer and Dictionaries.
     9  
    10  import (
    11  	"fmt"
    12  	"sort"
    13  	"strings"
    14  
    15  	"golang.org/x/text/language"
    16  )
    17  
    18  type namer interface {
    19  	// name gets the string for the given index. It should walk the
    20  	// inheritance chain if a value is not present in the base index.
    21  	name(idx int) string
    22  }
    23  
    24  func nameLanguage(n namer, x interface{}) string {
    25  	t, _ := language.All.Compose(x)
    26  	i, _, _ := langTagSet.index(t.Raw())
    27  	return n.name(i)
    28  }
    29  
    30  func nameScript(n namer, x interface{}) string {
    31  	t, _ := language.DeprecatedScript.Compose(x)
    32  	_, s, _ := t.Raw()
    33  	return n.name(scriptIndex.index(s.String()))
    34  }
    35  
    36  func nameRegion(n namer, x interface{}) string {
    37  	t, _ := language.DeprecatedRegion.Compose(x)
    38  	_, _, r := t.Raw()
    39  	return n.name(regionIndex.index(r.String()))
    40  }
    41  
    42  func nameTag(langN, scrN, regN namer, x interface{}) string {
    43  	t, ok := x.(language.Tag)
    44  	if !ok {
    45  		return ""
    46  	}
    47  	const form = language.All &^ language.SuppressScript
    48  	if c, err := form.Canonicalize(t); err == nil {
    49  		t = c
    50  	}
    51  	i, scr, reg := langTagSet.index(t.Raw())
    52  	if i == -1 {
    53  		return ""
    54  	}
    55  
    56  	str := langN.name(i)
    57  	if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
    58  		ss, sr := "", ""
    59  		if hasS {
    60  			ss = scrN.name(scriptIndex.index(scr.String()))
    61  		}
    62  		if hasR {
    63  			sr = regN.name(regionIndex.index(reg.String()))
    64  		}
    65  		// TODO: use patterns in CLDR or at least confirm they are the same for
    66  		// all languages.
    67  		if ss != "" && sr != "" {
    68  			return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
    69  		}
    70  		if ss != "" || sr != "" {
    71  			return fmt.Sprintf("%s (%s%s)", str, ss, sr)
    72  		}
    73  	}
    74  	return str
    75  }
    76  
    77  // header contains the data and indexes for a single namer.
    78  // data contains a series of strings concatenated into one. index contains the
    79  // offsets for a string in data. For example, consider a header that defines
    80  // strings for the languages de, el, en, fi, and nl:
    81  //
    82  // 		header{
    83  // 			data: "GermanGreekEnglishDutch",
    84  //  		index: []uint16{ 0, 6, 11, 18, 18, 23 },
    85  // 		}
    86  //
    87  // For a language with index i, the string is defined by
    88  // data[index[i]:index[i+1]]. So the number of elements in index is always one
    89  // greater than the number of languages for which header defines a value.
    90  // A string for a language may be empty, which means the name is undefined. In
    91  // the above example, the name for fi (Finnish) is undefined.
    92  type header struct {
    93  	data  string
    94  	index []uint16
    95  }
    96  
    97  // name looks up the name for a tag in the dictionary, given its index.
    98  func (h *header) name(i int) string {
    99  	if i < len(h.index)-1 {
   100  		return h.data[h.index[i]:h.index[i+1]]
   101  	}
   102  	return ""
   103  }
   104  
   105  // tagSet is used to find the index of a language in a set of tags.
   106  type tagSet struct {
   107  	single tagIndex
   108  	long   []string
   109  }
   110  
   111  var (
   112  	langTagSet = tagSet{
   113  		single: langIndex,
   114  		long:   langTagsLong,
   115  	}
   116  
   117  	// selfTagSet is used for indexing the language strings in their own
   118  	// language.
   119  	selfTagSet = tagSet{
   120  		single: selfIndex,
   121  		long:   selfTagsLong,
   122  	}
   123  
   124  	zzzz = language.MustParseScript("Zzzz")
   125  	zz   = language.MustParseRegion("ZZ")
   126  )
   127  
   128  // index returns the index of the tag for the given base, script and region or
   129  // its parent if the tag is not available. If the match is for a parent entry,
   130  // the excess script and region are returned.
   131  func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
   132  	lang := base.String()
   133  	index := -1
   134  	if (scr != language.Script{} || reg != language.Region{}) {
   135  		if scr == zzzz {
   136  			scr = language.Script{}
   137  		}
   138  		if reg == zz {
   139  			reg = language.Region{}
   140  		}
   141  
   142  		i := sort.SearchStrings(ts.long, lang)
   143  		// All entries have either a script or a region and not both.
   144  		scrStr, regStr := scr.String(), reg.String()
   145  		for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
   146  			if s := ts.long[i][len(lang)+1:]; s == scrStr {
   147  				scr = language.Script{}
   148  				index = i + ts.single.len()
   149  				break
   150  			} else if s == regStr {
   151  				reg = language.Region{}
   152  				index = i + ts.single.len()
   153  				break
   154  			}
   155  		}
   156  	}
   157  	if index == -1 {
   158  		index = ts.single.index(lang)
   159  	}
   160  	return index, scr, reg
   161  }
   162  
   163  func (ts *tagSet) Tags() []language.Tag {
   164  	tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
   165  	ts.single.keys(func(s string) {
   166  		tags = append(tags, language.Raw.MustParse(s))
   167  	})
   168  	for _, s := range ts.long {
   169  		tags = append(tags, language.Raw.MustParse(s))
   170  	}
   171  	return tags
   172  }
   173  
   174  func supportedScripts() []language.Script {
   175  	scr := make([]language.Script, 0, scriptIndex.len())
   176  	scriptIndex.keys(func(s string) {
   177  		scr = append(scr, language.MustParseScript(s))
   178  	})
   179  	return scr
   180  }
   181  
   182  func supportedRegions() []language.Region {
   183  	reg := make([]language.Region, 0, regionIndex.len())
   184  	regionIndex.keys(func(s string) {
   185  		reg = append(reg, language.MustParseRegion(s))
   186  	})
   187  	return reg
   188  }
   189  
   190  // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
   191  // for each length, which can be used in combination with binary search to get
   192  // the index associated with a tag.
   193  // For example, a tagIndex{
   194  //   "arenesfrruzh",  // 6 2-byte tags.
   195  //   "barwae",        // 2 3-byte tags.
   196  //   "",
   197  // }
   198  // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
   199  // "wae" had an index of 7.
   200  type tagIndex [3]string
   201  
   202  func (t *tagIndex) index(s string) int {
   203  	sz := len(s)
   204  	if sz < 2 || 4 < sz {
   205  		return -1
   206  	}
   207  	a := t[sz-2]
   208  	index := sort.Search(len(a)/sz, func(i int) bool {
   209  		p := i * sz
   210  		return a[p:p+sz] >= s
   211  	})
   212  	p := index * sz
   213  	if end := p + sz; end > len(a) || a[p:end] != s {
   214  		return -1
   215  	}
   216  	// Add the number of tags for smaller sizes.
   217  	for i := 0; i < sz-2; i++ {
   218  		index += len(t[i]) / (i + 2)
   219  	}
   220  	return index
   221  }
   222  
   223  // len returns the number of tags that are contained in the tagIndex.
   224  func (t *tagIndex) len() (n int) {
   225  	for i, s := range t {
   226  		n += len(s) / (i + 2)
   227  	}
   228  	return n
   229  }
   230  
   231  // keys calls f for each tag.
   232  func (t *tagIndex) keys(f func(key string)) {
   233  	for i, s := range *t {
   234  		for ; s != ""; s = s[i+2:] {
   235  			f(s[:i+2])
   236  		}
   237  	}
   238  }