github.com/liquid-dev/text@v0.3.3-liquid/feature/plural/plural.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen.go gen_common.go
     6  
     7  // Package plural provides utilities for handling linguistic plurals in text.
     8  //
     9  // The definitions in this package are based on the plural rule handling defined
    10  // in CLDR. See
    11  // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules for
    12  // details.
    13  package plural
    14  
    15  import (
    16  	"github.com/liquid-dev/text/internal/language/compact"
    17  	"github.com/liquid-dev/text/internal/number"
    18  	"github.com/liquid-dev/text/language"
    19  )
    20  
    21  // Rules defines the plural rules for all languages for a certain plural type.
    22  //
    23  //
    24  // This package is UNDER CONSTRUCTION and its API may change.
    25  type Rules struct {
    26  	rules          []pluralCheck
    27  	index          []byte
    28  	langToIndex    []byte
    29  	inclusionMasks []uint64
    30  }
    31  
    32  var (
    33  	// Cardinal defines the plural rules for numbers indicating quantities.
    34  	Cardinal *Rules = cardinal
    35  
    36  	// Ordinal defines the plural rules for numbers indicating position
    37  	// (first, second, etc.).
    38  	Ordinal *Rules = ordinal
    39  
    40  	ordinal = &Rules{
    41  		ordinalRules,
    42  		ordinalIndex,
    43  		ordinalLangToIndex,
    44  		ordinalInclusionMasks[:],
    45  	}
    46  
    47  	cardinal = &Rules{
    48  		cardinalRules,
    49  		cardinalIndex,
    50  		cardinalLangToIndex,
    51  		cardinalInclusionMasks[:],
    52  	}
    53  )
    54  
    55  // getIntApprox converts the digits in slice digits[start:end] to an integer
    56  // according to the following rules:
    57  //	- Let i be asInt(digits[start:end]), where out-of-range digits are assumed
    58  //	  to be zero.
    59  //	- Result n is big if i / 10^nMod > 1.
    60  //	- Otherwise the result is i % 10^nMod.
    61  //
    62  // For example, if digits is {1, 2, 3} and start:end is 0:5, then the result
    63  // for various values of nMod is:
    64  //	- when nMod == 2, n == big
    65  //	- when nMod == 3, n == big
    66  //	- when nMod == 4, n == big
    67  //	- when nMod == 5, n == 12300
    68  //	- when nMod == 6, n == 12300
    69  //	- when nMod == 7, n == 12300
    70  func getIntApprox(digits []byte, start, end, nMod, big int) (n int) {
    71  	// Leading 0 digits just result in 0.
    72  	p := start
    73  	if p < 0 {
    74  		p = 0
    75  	}
    76  	// Range only over the part for which we have digits.
    77  	mid := end
    78  	if mid >= len(digits) {
    79  		mid = len(digits)
    80  	}
    81  	// Check digits more significant that nMod.
    82  	if q := end - nMod; q > 0 {
    83  		if q > mid {
    84  			q = mid
    85  		}
    86  		for ; p < q; p++ {
    87  			if digits[p] != 0 {
    88  				return big
    89  			}
    90  		}
    91  	}
    92  	for ; p < mid; p++ {
    93  		n = 10*n + int(digits[p])
    94  	}
    95  	// Multiply for trailing zeros.
    96  	for ; p < end; p++ {
    97  		n *= 10
    98  	}
    99  	return n
   100  }
   101  
   102  // MatchDigits computes the plural form for the given language and the given
   103  // decimal floating point digits. The digits are stored in big-endian order and
   104  // are of value byte(0) - byte(9). The floating point position is indicated by
   105  // exp and the number of visible decimals is scale. All leading and trailing
   106  // zeros may be omitted from digits.
   107  //
   108  // The following table contains examples of possible arguments to represent
   109  // the given numbers.
   110  //      decimal    digits              exp    scale
   111  //      123        []byte{1, 2, 3}     3      0
   112  //      123.4      []byte{1, 2, 3, 4}  3      1
   113  //      123.40     []byte{1, 2, 3, 4}  3      2
   114  //      100000     []byte{1}           6      0
   115  //      100000.00  []byte{1}           6      3
   116  func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form {
   117  	index := tagToID(t)
   118  
   119  	// Differentiate up to including mod 1000000 for the integer part.
   120  	n := getIntApprox(digits, 0, exp, 6, 1000000)
   121  
   122  	// Differentiate up to including mod 100 for the fractional part.
   123  	f := getIntApprox(digits, exp, exp+scale, 2, 100)
   124  
   125  	return matchPlural(p, index, n, f, scale)
   126  }
   127  
   128  func (p *Rules) matchDisplayDigits(t language.Tag, d *number.Digits) (Form, int) {
   129  	n := getIntApprox(d.Digits, 0, int(d.Exp), 6, 1000000)
   130  	return p.MatchDigits(t, d.Digits, int(d.Exp), d.NumFracDigits()), n
   131  }
   132  
   133  func validForms(p *Rules, t language.Tag) (forms []Form) {
   134  	offset := p.langToIndex[tagToID(t)]
   135  	rules := p.rules[p.index[offset]:p.index[offset+1]]
   136  
   137  	forms = append(forms, Other)
   138  	last := Other
   139  	for _, r := range rules {
   140  		if cat := Form(r.cat & formMask); cat != andNext && last != cat {
   141  			forms = append(forms, cat)
   142  			last = cat
   143  		}
   144  	}
   145  	return forms
   146  }
   147  
   148  func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form {
   149  	return matchPlural(p, tagToID(t), n, f, scale)
   150  }
   151  
   152  // MatchPlural returns the plural form for the given language and plural
   153  // operands (as defined in
   154  // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules):
   155  //  where
   156  //  	n  absolute value of the source number (integer and decimals)
   157  //  input
   158  //  	i  integer digits of n.
   159  //  	v  number of visible fraction digits in n, with trailing zeros.
   160  //  	w  number of visible fraction digits in n, without trailing zeros.
   161  //  	f  visible fractional digits in n, with trailing zeros (f = t * 10^(v-w))
   162  //  	t  visible fractional digits in n, without trailing zeros.
   163  //
   164  // If any of the operand values is too large to fit in an int, it is okay to
   165  // pass the value modulo 10,000,000.
   166  func (p *Rules) MatchPlural(lang language.Tag, i, v, w, f, t int) Form {
   167  	return matchPlural(p, tagToID(lang), i, f, v)
   168  }
   169  
   170  func matchPlural(p *Rules, index compact.ID, n, f, v int) Form {
   171  	nMask := p.inclusionMasks[n%maxMod]
   172  	// Compute the fMask inline in the rules below, as it is relatively rare.
   173  	// fMask := p.inclusionMasks[f%maxMod]
   174  	vMask := p.inclusionMasks[v%maxMod]
   175  
   176  	// Do the matching
   177  	offset := p.langToIndex[index]
   178  	rules := p.rules[p.index[offset]:p.index[offset+1]]
   179  	for i := 0; i < len(rules); i++ {
   180  		rule := rules[i]
   181  		setBit := uint64(1 << rule.setID)
   182  		var skip bool
   183  		switch op := opID(rule.cat >> opShift); op {
   184  		case opI: // i = x
   185  			skip = n >= numN || nMask&setBit == 0
   186  
   187  		case opI | opNotEqual: // i != x
   188  			skip = n < numN && nMask&setBit != 0
   189  
   190  		case opI | opMod: // i % m = x
   191  			skip = nMask&setBit == 0
   192  
   193  		case opI | opMod | opNotEqual: // i % m != x
   194  			skip = nMask&setBit != 0
   195  
   196  		case opN: // n = x
   197  			skip = f != 0 || n >= numN || nMask&setBit == 0
   198  
   199  		case opN | opNotEqual: // n != x
   200  			skip = f == 0 && n < numN && nMask&setBit != 0
   201  
   202  		case opN | opMod: // n % m = x
   203  			skip = f != 0 || nMask&setBit == 0
   204  
   205  		case opN | opMod | opNotEqual: // n % m != x
   206  			skip = f == 0 && nMask&setBit != 0
   207  
   208  		case opF: // f = x
   209  			skip = f >= numN || p.inclusionMasks[f%maxMod]&setBit == 0
   210  
   211  		case opF | opNotEqual: // f != x
   212  			skip = f < numN && p.inclusionMasks[f%maxMod]&setBit != 0
   213  
   214  		case opF | opMod: // f % m = x
   215  			skip = p.inclusionMasks[f%maxMod]&setBit == 0
   216  
   217  		case opF | opMod | opNotEqual: // f % m != x
   218  			skip = p.inclusionMasks[f%maxMod]&setBit != 0
   219  
   220  		case opV: // v = x
   221  			skip = v < numN && vMask&setBit == 0
   222  
   223  		case opV | opNotEqual: // v != x
   224  			skip = v < numN && vMask&setBit != 0
   225  
   226  		case opW: // w == 0
   227  			skip = f != 0
   228  
   229  		case opW | opNotEqual: // w != 0
   230  			skip = f == 0
   231  
   232  		// Hard-wired rules that cannot be handled by our algorithm.
   233  
   234  		case opBretonM:
   235  			skip = f != 0 || n == 0 || n%1000000 != 0
   236  
   237  		case opAzerbaijan00s:
   238  			// 100,200,300,400,500,600,700,800,900
   239  			skip = n == 0 || n >= 1000 || n%100 != 0
   240  
   241  		case opItalian800:
   242  			skip = (f != 0 || n >= numN || nMask&setBit == 0) && n != 800
   243  		}
   244  		if skip {
   245  			// advance over AND entries.
   246  			for ; i < len(rules) && rules[i].cat&formMask == andNext; i++ {
   247  			}
   248  			continue
   249  		}
   250  		// return if we have a final entry.
   251  		if cat := rule.cat & formMask; cat != andNext {
   252  			return Form(cat)
   253  		}
   254  	}
   255  	return Other
   256  }
   257  
   258  func tagToID(t language.Tag) compact.ID {
   259  	id, _ := compact.RegionalID(compact.Tag(t))
   260  	return id
   261  }