github.com/liquid-dev/text@v0.3.3-liquid/internal/language/match.go (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package language
     6  
     7  import "errors"
     8  
     9  type scriptRegionFlags uint8
    10  
    11  const (
    12  	isList = 1 << iota
    13  	scriptInFrom
    14  	regionInFrom
    15  )
    16  
    17  func (t *Tag) setUndefinedLang(id Language) {
    18  	if t.LangID == 0 {
    19  		t.LangID = id
    20  	}
    21  }
    22  
    23  func (t *Tag) setUndefinedScript(id Script) {
    24  	if t.ScriptID == 0 {
    25  		t.ScriptID = id
    26  	}
    27  }
    28  
    29  func (t *Tag) setUndefinedRegion(id Region) {
    30  	if t.RegionID == 0 || t.RegionID.Contains(id) {
    31  		t.RegionID = id
    32  	}
    33  }
    34  
    35  // ErrMissingLikelyTagsData indicates no information was available
    36  // to compute likely values of missing tags.
    37  var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
    38  
    39  // addLikelySubtags sets subtags to their most likely value, given the locale.
    40  // In most cases this means setting fields for unknown values, but in some
    41  // cases it may alter a value.  It returns an ErrMissingLikelyTagsData error
    42  // if the given locale cannot be expanded.
    43  func (t Tag) addLikelySubtags() (Tag, error) {
    44  	id, err := addTags(t)
    45  	if err != nil {
    46  		return t, err
    47  	} else if id.equalTags(t) {
    48  		return t, nil
    49  	}
    50  	id.RemakeString()
    51  	return id, nil
    52  }
    53  
    54  // specializeRegion attempts to specialize a group region.
    55  func specializeRegion(t *Tag) bool {
    56  	if i := regionInclusion[t.RegionID]; i < nRegionGroups {
    57  		x := likelyRegionGroup[i]
    58  		if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
    59  			t.RegionID = Region(x.region)
    60  		}
    61  		return true
    62  	}
    63  	return false
    64  }
    65  
    66  // Maximize returns a new tag with missing tags filled in.
    67  func (t Tag) Maximize() (Tag, error) {
    68  	return addTags(t)
    69  }
    70  
    71  func addTags(t Tag) (Tag, error) {
    72  	// We leave private use identifiers alone.
    73  	if t.IsPrivateUse() {
    74  		return t, nil
    75  	}
    76  	if t.ScriptID != 0 && t.RegionID != 0 {
    77  		if t.LangID != 0 {
    78  			// already fully specified
    79  			specializeRegion(&t)
    80  			return t, nil
    81  		}
    82  		// Search matches for und-script-region. Note that for these cases
    83  		// region will never be a group so there is no need to check for this.
    84  		list := likelyRegion[t.RegionID : t.RegionID+1]
    85  		if x := list[0]; x.flags&isList != 0 {
    86  			list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
    87  		}
    88  		for _, x := range list {
    89  			// Deviating from the spec. See match_test.go for details.
    90  			if Script(x.script) == t.ScriptID {
    91  				t.setUndefinedLang(Language(x.lang))
    92  				return t, nil
    93  			}
    94  		}
    95  	}
    96  	if t.LangID != 0 {
    97  		// Search matches for lang-script and lang-region, where lang != und.
    98  		if t.LangID < langNoIndexOffset {
    99  			x := likelyLang[t.LangID]
   100  			if x.flags&isList != 0 {
   101  				list := likelyLangList[x.region : x.region+uint16(x.script)]
   102  				if t.ScriptID != 0 {
   103  					for _, x := range list {
   104  						if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
   105  							t.setUndefinedRegion(Region(x.region))
   106  							return t, nil
   107  						}
   108  					}
   109  				} else if t.RegionID != 0 {
   110  					count := 0
   111  					goodScript := true
   112  					tt := t
   113  					for _, x := range list {
   114  						// We visit all entries for which the script was not
   115  						// defined, including the ones where the region was not
   116  						// defined. This allows for proper disambiguation within
   117  						// regions.
   118  						if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
   119  							tt.RegionID = Region(x.region)
   120  							tt.setUndefinedScript(Script(x.script))
   121  							goodScript = goodScript && tt.ScriptID == Script(x.script)
   122  							count++
   123  						}
   124  					}
   125  					if count == 1 {
   126  						return tt, nil
   127  					}
   128  					// Even if we fail to find a unique Region, we might have
   129  					// an unambiguous script.
   130  					if goodScript {
   131  						t.ScriptID = tt.ScriptID
   132  					}
   133  				}
   134  			}
   135  		}
   136  	} else {
   137  		// Search matches for und-script.
   138  		if t.ScriptID != 0 {
   139  			x := likelyScript[t.ScriptID]
   140  			if x.region != 0 {
   141  				t.setUndefinedRegion(Region(x.region))
   142  				t.setUndefinedLang(Language(x.lang))
   143  				return t, nil
   144  			}
   145  		}
   146  		// Search matches for und-region. If und-script-region exists, it would
   147  		// have been found earlier.
   148  		if t.RegionID != 0 {
   149  			if i := regionInclusion[t.RegionID]; i < nRegionGroups {
   150  				x := likelyRegionGroup[i]
   151  				if x.region != 0 {
   152  					t.setUndefinedLang(Language(x.lang))
   153  					t.setUndefinedScript(Script(x.script))
   154  					t.RegionID = Region(x.region)
   155  				}
   156  			} else {
   157  				x := likelyRegion[t.RegionID]
   158  				if x.flags&isList != 0 {
   159  					x = likelyRegionList[x.lang]
   160  				}
   161  				if x.script != 0 && x.flags != scriptInFrom {
   162  					t.setUndefinedLang(Language(x.lang))
   163  					t.setUndefinedScript(Script(x.script))
   164  					return t, nil
   165  				}
   166  			}
   167  		}
   168  	}
   169  
   170  	// Search matches for lang.
   171  	if t.LangID < langNoIndexOffset {
   172  		x := likelyLang[t.LangID]
   173  		if x.flags&isList != 0 {
   174  			x = likelyLangList[x.region]
   175  		}
   176  		if x.region != 0 {
   177  			t.setUndefinedScript(Script(x.script))
   178  			t.setUndefinedRegion(Region(x.region))
   179  		}
   180  		specializeRegion(&t)
   181  		if t.LangID == 0 {
   182  			t.LangID = _en // default language
   183  		}
   184  		return t, nil
   185  	}
   186  	return t, ErrMissingLikelyTagsData
   187  }
   188  
   189  func (t *Tag) setTagsFrom(id Tag) {
   190  	t.LangID = id.LangID
   191  	t.ScriptID = id.ScriptID
   192  	t.RegionID = id.RegionID
   193  }
   194  
   195  // minimize removes the region or script subtags from t such that
   196  // t.addLikelySubtags() == t.minimize().addLikelySubtags().
   197  func (t Tag) minimize() (Tag, error) {
   198  	t, err := minimizeTags(t)
   199  	if err != nil {
   200  		return t, err
   201  	}
   202  	t.RemakeString()
   203  	return t, nil
   204  }
   205  
   206  // minimizeTags mimics the behavior of the ICU 51 C implementation.
   207  func minimizeTags(t Tag) (Tag, error) {
   208  	if t.equalTags(Und) {
   209  		return t, nil
   210  	}
   211  	max, err := addTags(t)
   212  	if err != nil {
   213  		return t, err
   214  	}
   215  	for _, id := range [...]Tag{
   216  		{LangID: t.LangID},
   217  		{LangID: t.LangID, RegionID: t.RegionID},
   218  		{LangID: t.LangID, ScriptID: t.ScriptID},
   219  	} {
   220  		if x, err := addTags(id); err == nil && max.equalTags(x) {
   221  			t.setTagsFrom(id)
   222  			break
   223  		}
   224  	}
   225  	return t, nil
   226  }