github.com/go-enjin/golang-org-x-text@v0.12.1-enjin.2/internal/language/compact/language.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen.go gen_index.go -output tables.go
     6  //go:generate go run gen_parents.go
     7  
     8  package compact
     9  
    10  // TODO: Remove above NOTE after:
    11  // - verifying that tables are dropped correctly (most notably matcher tables).
    12  
    13  import (
    14  	"fmt"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/go-enjin/golang-org-x-text/internal/language"
    19  )
    20  
    21  // Tag represents a BCP 47 language tag. It is used to specify an instance of a
    22  // specific language or locale. All language tag values are guaranteed to be
    23  // well-formed.
    24  type Tag struct {
    25  	// NOTE: exported tags will become part of the public API.
    26  	language ID
    27  	locale   ID
    28  	full     fullTag // always a language.Tag for now.
    29  }
    30  
    31  func (t *Tag) MarshalBinary() (data []byte, err error) {
    32  	text := fmt.Sprintf("language:%d;locale:%d", t.language, t.locale)
    33  	data = []byte(text)
    34  	return
    35  }
    36  
    37  func (t *Tag) UnmarshalBinary(data []byte) (err error) {
    38  	text := string(data)
    39  	segments := strings.Split(text, ";")
    40  	if len(segments) != 2 {
    41  		err = fmt.Errorf("invalid number of segements in gob data: \"%v\"", text)
    42  		return
    43  	}
    44  
    45  	for _, segment := range segments {
    46  		kv := strings.Split(segment, ":")
    47  		if len(kv) != 2 {
    48  			err = fmt.Errorf("invalid key-value pair in gob data segment: \"%v\"", segment)
    49  			return
    50  		}
    51  		switch kv[0] {
    52  		case "language":
    53  			if vi, ee := strconv.ParseUint(kv[1], 16, 16); ee != nil {
    54  				err = fmt.Errorf("invalid language value in gob data pair: %+v - %v", kv, ee)
    55  				return
    56  			} else {
    57  				t.language = ID(vi)
    58  			}
    59  		case "locale":
    60  			if vi, ee := strconv.ParseUint(kv[1], 16, 16); ee != nil {
    61  				err = fmt.Errorf("invalid locale value in gob data pair: %+v - %v", kv, ee)
    62  				return
    63  			} else {
    64  				t.locale = ID(vi)
    65  			}
    66  		default:
    67  			err = fmt.Errorf("invalid key name in gob data pair: %+v", kv)
    68  			return
    69  		}
    70  	}
    71  	return
    72  }
    73  
    74  const _und = 0
    75  
    76  type fullTag interface {
    77  	IsRoot() bool
    78  	Parent() language.Tag
    79  }
    80  
    81  // Make a compact Tag from a fully specified internal language Tag.
    82  func Make(t language.Tag) (tag Tag) {
    83  	if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
    84  		if r, err := language.ParseRegion(region[:2]); err == nil {
    85  			tFull := t
    86  			t, _ = t.SetTypeForKey("rg", "")
    87  			// TODO: should we not consider "va" for the language tag?
    88  			var exact1, exact2 bool
    89  			tag.language, exact1 = FromTag(t)
    90  			t.RegionID = r
    91  			tag.locale, exact2 = FromTag(t)
    92  			if !exact1 || !exact2 {
    93  				tag.full = tFull
    94  			}
    95  			return tag
    96  		}
    97  	}
    98  	lang, ok := FromTag(t)
    99  	tag.language = lang
   100  	tag.locale = lang
   101  	if !ok {
   102  		tag.full = t
   103  	}
   104  	return tag
   105  }
   106  
   107  // Tag returns an internal language Tag version of this tag.
   108  func (t Tag) Tag() language.Tag {
   109  	if t.full != nil {
   110  		return t.full.(language.Tag)
   111  	}
   112  	tag := t.language.Tag()
   113  	if t.language != t.locale {
   114  		loc := t.locale.Tag()
   115  		tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
   116  	}
   117  	return tag
   118  }
   119  
   120  // IsCompact reports whether this tag is fully defined in terms of ID.
   121  func (t *Tag) IsCompact() bool {
   122  	return t.full == nil
   123  }
   124  
   125  // MayHaveVariants reports whether a tag may have variants. If it returns false
   126  // it is guaranteed the tag does not have variants.
   127  func (t Tag) MayHaveVariants() bool {
   128  	return t.full != nil || int(t.language) >= len(coreTags)
   129  }
   130  
   131  // MayHaveExtensions reports whether a tag may have extensions. If it returns
   132  // false it is guaranteed the tag does not have them.
   133  func (t Tag) MayHaveExtensions() bool {
   134  	return t.full != nil ||
   135  		int(t.language) >= len(coreTags) ||
   136  		t.language != t.locale
   137  }
   138  
   139  // IsRoot returns true if t is equal to language "und".
   140  func (t Tag) IsRoot() bool {
   141  	if t.full != nil {
   142  		return t.full.IsRoot()
   143  	}
   144  	return t.language == _und
   145  }
   146  
   147  // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
   148  // specific language are substituted with fields from the parent language.
   149  // The parent for a language may change for newer versions of CLDR.
   150  func (t Tag) Parent() Tag {
   151  	if t.full != nil {
   152  		return Make(t.full.Parent())
   153  	}
   154  	if t.language != t.locale {
   155  		// Simulate stripping -u-rg-xxxxxx
   156  		return Tag{language: t.language, locale: t.language}
   157  	}
   158  	// TODO: use parent lookup table once cycle from internal package is
   159  	// removed. Probably by internalizing the table and declaring this fast
   160  	// enough.
   161  	// lang := compactID(internal.Parent(uint16(t.language)))
   162  	lang, _ := FromTag(t.language.Tag().Parent())
   163  	return Tag{language: lang, locale: lang}
   164  }
   165  
   166  // nextToken returns token t and the rest of the string.
   167  func nextToken(s string) (t, tail string) {
   168  	p := strings.Index(s[1:], "-")
   169  	if p == -1 {
   170  		return s[1:], ""
   171  	}
   172  	p++
   173  	return s[1:p], s[p:]
   174  }
   175  
   176  // LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
   177  // for which data exists in the text repository.The index will change over time
   178  // and should not be stored in persistent storage. If t does not match a compact
   179  // index, exact will be false and the compact index will be returned for the
   180  // first match after repeatedly taking the Parent of t.
   181  func LanguageID(t Tag) (id ID, exact bool) {
   182  	return t.language, t.full == nil
   183  }
   184  
   185  // RegionalID returns the ID for the regional variant of this tag. This index is
   186  // used to indicate region-specific overrides, such as default currency, default
   187  // calendar and week data, default time cycle, and default measurement system
   188  // and unit preferences.
   189  //
   190  // For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
   191  // settings for currency, number formatting, etc. The CompactIndex for this tag
   192  // will be that for en-GB, while the RegionalID will be the one corresponding to
   193  // en-US.
   194  func RegionalID(t Tag) (id ID, exact bool) {
   195  	return t.locale, t.full == nil
   196  }
   197  
   198  // LanguageTag returns t stripped of regional variant indicators.
   199  //
   200  // At the moment this means it is stripped of a regional and variant subtag "rg"
   201  // and "va" in the "u" extension.
   202  func (t Tag) LanguageTag() Tag {
   203  	if t.full == nil {
   204  		return Tag{language: t.language, locale: t.language}
   205  	}
   206  	tt := t.Tag()
   207  	tt.SetTypeForKey("rg", "")
   208  	tt.SetTypeForKey("va", "")
   209  	return Make(tt)
   210  }
   211  
   212  // RegionalTag returns the regional variant of the tag.
   213  //
   214  // At the moment this means that the region is set from the regional subtag
   215  // "rg" in the "u" extension.
   216  func (t Tag) RegionalTag() Tag {
   217  	rt := Tag{language: t.locale, locale: t.locale}
   218  	if t.full == nil {
   219  		return rt
   220  	}
   221  	b := language.Builder{}
   222  	tag := t.Tag()
   223  	// tag, _ = tag.SetTypeForKey("rg", "")
   224  	b.SetTag(t.locale.Tag())
   225  	if v := tag.Variants(); v != "" {
   226  		for _, v := range strings.Split(v, "-") {
   227  			b.AddVariant(v)
   228  		}
   229  	}
   230  	for _, e := range tag.Extensions() {
   231  		b.AddExt(e)
   232  	}
   233  	return t
   234  }
   235  
   236  // FromTag reports closest matching ID for an internal language Tag.
   237  func FromTag(t language.Tag) (id ID, exact bool) {
   238  	// TODO: perhaps give more frequent tags a lower index.
   239  	// TODO: we could make the indexes stable. This will excluded some
   240  	//       possibilities for optimization, so don't do this quite yet.
   241  	exact = true
   242  
   243  	b, s, r := t.Raw()
   244  	if t.HasString() {
   245  		if t.IsPrivateUse() {
   246  			// We have no entries for user-defined tags.
   247  			return 0, false
   248  		}
   249  		hasExtra := false
   250  		if t.HasVariants() {
   251  			if t.HasExtensions() {
   252  				build := language.Builder{}
   253  				build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
   254  				build.AddVariant(t.Variants())
   255  				exact = false
   256  				t = build.Make()
   257  			}
   258  			hasExtra = true
   259  		} else if _, ok := t.Extension('u'); ok {
   260  			// TODO: va may mean something else. Consider not considering it.
   261  			// Strip all but the 'va' entry.
   262  			old := t
   263  			variant := t.TypeForKey("va")
   264  			t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
   265  			if variant != "" {
   266  				t, _ = t.SetTypeForKey("va", variant)
   267  				hasExtra = true
   268  			}
   269  			exact = old == t
   270  		} else {
   271  			exact = false
   272  		}
   273  		if hasExtra {
   274  			// We have some variants.
   275  			for i, s := range specialTags {
   276  				if s == t {
   277  					return ID(i + len(coreTags)), exact
   278  				}
   279  			}
   280  			exact = false
   281  		}
   282  	}
   283  	if x, ok := getCoreIndex(t); ok {
   284  		return x, exact
   285  	}
   286  	exact = false
   287  	if r != 0 && s == 0 {
   288  		// Deal with cases where an extra script is inserted for the region.
   289  		t, _ := t.Maximize()
   290  		if x, ok := getCoreIndex(t); ok {
   291  			return x, exact
   292  		}
   293  	}
   294  	for t = t.Parent(); t != root; t = t.Parent() {
   295  		// No variants specified: just compare core components.
   296  		// The key has the form lllssrrr, where l, s, and r are nibbles for
   297  		// respectively the langID, scriptID, and regionID.
   298  		if x, ok := getCoreIndex(t); ok {
   299  			return x, exact
   300  		}
   301  	}
   302  	return 0, exact
   303  }
   304  
   305  var root = language.Tag{}