github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/golang/text/internal/number/gen_plural.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build ignore
     6  
     7  package main
     8  
     9  // This file generates data for the CLDR plural rules, as defined in
    10  //    http://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
    11  //
    12  // We assume a slightly simplified grammar:
    13  //
    14  // 		condition     = and_condition ('or' and_condition)* samples
    15  // 		and_condition = relation ('and' relation)*
    16  // 		relation      = expr ('=' | '!=') range_list
    17  // 		expr          = operand ('%' '10' '0'* )?
    18  // 		operand       = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
    19  // 		range_list    = (range | value) (',' range_list)*
    20  // 		range         = value'..'value
    21  // 		value         = digit+
    22  // 		digit         = 0|1|2|3|4|5|6|7|8|9
    23  //
    24  // 		samples       = ('@integer' sampleList)?
    25  // 		                ('@decimal' sampleList)?
    26  // 		sampleList    = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
    27  // 		sampleRange   = decimalValue ('~' decimalValue)?
    28  // 		decimalValue  = value ('.' value)?
    29  //
    30  //		Symbol	Value
    31  //		n	absolute value of the source number (integer and decimals).
    32  //		i	integer digits of n.
    33  //		v	number of visible fraction digits in n, with trailing zeros.
    34  //		w	number of visible fraction digits in n, without trailing zeros.
    35  //		f	visible fractional digits in n, with trailing zeros.
    36  //		t	visible fractional digits in n, without trailing zeros.
    37  //
    38  // The algorithm for which the data is generated is based on the following
    39  // observations
    40  //
    41  //    - the number of different sets of numbers which the plural rules use to
    42  //      test inclusion is limited,
    43  //    - most numbers that are tested on are < 100
    44  //
    45  // This allows us to define a bitmap for each number < 100 where a bit i
    46  // indicates whether this number is included in some defined set i.
    47  // The function matchPlural in plural.go defines how we can subsequently use
    48  // this data to determine inclusion.
    49  //
    50  // There are a few languages for which this doesn't work. For one Italian and
    51  // Azerbaijan, which both test against numbers > 100 for ordinals and Breton,
    52  // which considers whether numbers are multiples of hundreds. The model here
    53  // could be extended to handle Italian and Azerbaijan fairly easily (by
    54  // considering the numbers 100, 200, 300, ..., 800, 900 in addition to the first
    55  // 100), but for now it seems easier to just hard-code these cases.
    56  
    57  import (
    58  	"bufio"
    59  	"bytes"
    60  	"fmt"
    61  	"log"
    62  	"strconv"
    63  	"strings"
    64  
    65  	"github.com/insionng/yougam/libraries/x/text/internal"
    66  	"github.com/insionng/yougam/libraries/x/text/internal/format/plural"
    67  	"github.com/insionng/yougam/libraries/x/text/internal/gen"
    68  	"github.com/insionng/yougam/libraries/x/text/language"
    69  	"github.com/insionng/yougam/libraries/x/text/unicode/cldr"
    70  )
    71  
    72  type pluralTest struct {
    73  	locales string // space-separated list of locales for this test
    74  	form    plural.Form
    75  	integer []string // Entries of the form \d+ or \d+~\d+
    76  	decimal []string // Entries of the form \f+ or \f+ +~\f+, where f is \d+\.\d+
    77  }
    78  
    79  func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) {
    80  	w.WriteType(pluralTest{})
    81  
    82  	for _, plurals := range data.Supplemental().Plurals {
    83  		if plurals.Type == "" {
    84  			// The empty type is reserved for plural ranges.
    85  			continue
    86  		}
    87  		tests := []pluralTest{}
    88  
    89  		for _, pRules := range plurals.PluralRules {
    90  			for _, rule := range pRules.PluralRule {
    91  				test := pluralTest{
    92  					locales: pRules.Locales,
    93  					form:    countMap[rule.Count],
    94  				}
    95  				scan := bufio.NewScanner(strings.NewReader(rule.Data()))
    96  				scan.Split(splitTokens)
    97  				var p *[]string
    98  				for scan.Scan() {
    99  					switch t := scan.Text(); t {
   100  					case "@integer":
   101  						p = &test.integer
   102  					case "@decimal":
   103  						p = &test.decimal
   104  					case ",", "…":
   105  					default:
   106  						if p != nil {
   107  							*p = append(*p, t)
   108  						}
   109  					}
   110  				}
   111  				tests = append(tests, test)
   112  			}
   113  		}
   114  		w.WriteVar(plurals.Type+"Tests", tests)
   115  	}
   116  }
   117  
   118  func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) {
   119  	for _, plurals := range data.Supplemental().Plurals {
   120  		if plurals.Type == "" {
   121  			continue
   122  		}
   123  		// Initialize setMap and inclusionMasks. They are already populated with
   124  		// a few entries to serve as an example and to assign nice numbers to
   125  		// common cases.
   126  
   127  		// setMap contains sets of numbers represented by boolean arrays where
   128  		// a true value for element i means that the number i is included.
   129  		setMap := map[[numN]bool]int{
   130  			// The above init func adds an entry for including all numbers.
   131  			[numN]bool{1: true}: 1, // fix {1} to a nice value
   132  			[numN]bool{2: true}: 2, // fix {2} to a nice value
   133  			[numN]bool{0: true}: 3, // fix {0} to a nice value
   134  		}
   135  
   136  		// inclusionMasks contains bit masks for every number under numN to
   137  		// indicate in which set the number is included. Bit 1 << x will be set
   138  		// if it is included in set x.
   139  		inclusionMasks := [numN]uint64{
   140  			// Note: these entries are not complete: more bits will be set along the way.
   141  			0: 1 << 3,
   142  			1: 1 << 1,
   143  			2: 1 << 2,
   144  		}
   145  
   146  		// Create set {0..99}. We will assign this set the identifier 0.
   147  		var all [numN]bool
   148  		for i := range all {
   149  			// Mark number i as being included in the set (which has identifier 0).
   150  			inclusionMasks[i] |= 1 << 0
   151  			// Mark number i as included in the set.
   152  			all[i] = true
   153  		}
   154  		// Register the identifier for the set.
   155  		setMap[all] = 0
   156  
   157  		rules := []pluralCheck{}
   158  		index := []byte{0}
   159  		langMap := map[int]byte{0: 0} // From compact language index to index
   160  
   161  		for _, pRules := range plurals.PluralRules {
   162  			// Parse the rules.
   163  			var conds []orCondition
   164  			for _, rule := range pRules.PluralRule {
   165  				form := countMap[rule.Count]
   166  				conds = parsePluralCondition(conds, rule.Data(), form)
   167  			}
   168  			// Encode the rules.
   169  			for _, c := range conds {
   170  				// If an or condition only has filters, we create an entry for
   171  				// this filter and the set that contains all values.
   172  				empty := true
   173  				for _, b := range c.used {
   174  					empty = empty && !b
   175  				}
   176  				if empty {
   177  					rules = append(rules, pluralCheck{
   178  						cat:   byte(opMod<<opShift) | byte(c.form),
   179  						setID: 0, // all values
   180  					})
   181  					continue
   182  				}
   183  				// We have some entries with values.
   184  				for i, set := range c.set {
   185  					if !c.used[i] {
   186  						continue
   187  					}
   188  					index, ok := setMap[set]
   189  					if !ok {
   190  						index = len(setMap)
   191  						setMap[set] = index
   192  						for i := range inclusionMasks {
   193  							if set[i] {
   194  								inclusionMasks[i] |= 1 << uint64(index)
   195  							}
   196  						}
   197  					}
   198  					rules = append(rules, pluralCheck{
   199  						cat:   byte(i<<opShift | andNext),
   200  						setID: byte(index),
   201  					})
   202  				}
   203  				// Now set the last entry to the plural form the rule matches.
   204  				rules[len(rules)-1].cat &^= formMask
   205  				rules[len(rules)-1].cat |= byte(c.form)
   206  			}
   207  			// Point the relevant locales to the created entries.
   208  			for _, loc := range strings.Split(pRules.Locales, " ") {
   209  				if strings.TrimSpace(loc) == "" {
   210  					continue
   211  				}
   212  				lang, ok := language.CompactIndex(language.MustParse(loc))
   213  				if !ok {
   214  					log.Printf("No compact index for locale %q", loc)
   215  				}
   216  				langMap[lang] = byte(len(index) - 1)
   217  			}
   218  			index = append(index, byte(len(rules)))
   219  		}
   220  		w.WriteVar(plurals.Type+"Rules", rules)
   221  		w.WriteVar(plurals.Type+"Index", index)
   222  		// Expand the values.
   223  		langToIndex := make([]byte, language.NumCompactTags)
   224  		for i := range langToIndex {
   225  			for p := i; ; p = int(internal.Parent[p]) {
   226  				if x, ok := langMap[p]; ok {
   227  					langToIndex[i] = x
   228  					break
   229  				}
   230  			}
   231  		}
   232  		w.WriteVar(plurals.Type+"LangToIndex", langToIndex)
   233  		// Need to convert array to slice because of yougam/libraries/issue/7651.
   234  		// This will allow tables to be dropped when unused. This is especially
   235  		// relevant for the ordinal data, which I suspect won't be used as much.
   236  		w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:])
   237  
   238  		if len(rules) > 0xFF {
   239  			log.Fatalf("Too many entries for rules: %#x", len(rules))
   240  		}
   241  		if len(index) > 0xFF {
   242  			log.Fatalf("Too many entries for index: %#x", len(index))
   243  		}
   244  		if len(setMap) > 64 { // maximum number of bits.
   245  			log.Fatalf("Too many entries for setMap: %d", len(setMap))
   246  		}
   247  		w.WriteComment(
   248  			"Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets",
   249  			plurals.Type, len(rules), len(index), len(setMap))
   250  		// Prevent comment from attaching to the next entry.
   251  		fmt.Fprint(w, "\n\n")
   252  	}
   253  }
   254  
   255  type orCondition struct {
   256  	original string // for debugging
   257  
   258  	form plural.Form
   259  	used [32]bool
   260  	set  [32][numN]bool
   261  }
   262  
   263  func (o *orCondition) add(op opID, mod int, v []int) (ok bool) {
   264  	ok = true
   265  	for _, x := range v {
   266  		if x >= maxMod {
   267  			ok = false
   268  			break
   269  		}
   270  	}
   271  	for i := 0; i < numN; i++ {
   272  		m := i
   273  		if mod != 0 {
   274  			m = i % mod
   275  		}
   276  		if !intIn(m, v) {
   277  			o.set[op][i] = false
   278  		}
   279  	}
   280  	if ok {
   281  		o.used[op] = true
   282  	}
   283  	return ok
   284  }
   285  
   286  func intIn(x int, a []int) bool {
   287  	for _, y := range a {
   288  		if x == y {
   289  			return true
   290  		}
   291  	}
   292  	return false
   293  }
   294  
   295  var operandIndex = map[string]opID{
   296  	"i": opI,
   297  	"n": opN,
   298  	"f": opF,
   299  	"v": opV,
   300  	"w": opW,
   301  }
   302  
   303  // parsePluralCondition parses the condition of a single pluralRule and appends
   304  // the resulting or conditions to conds.
   305  //
   306  // Example rules:
   307  //   // Category "one" in English: only allow 1 with no visible fraction
   308  //   i = 1 and v = 0 @integer 1
   309  //
   310  //   // Category "few" in Czech: all numbers with visible fractions
   311  //   v != 0   @decimal ...
   312  //
   313  //   // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or
   314  //   // numbers with a fraction 11..19 and no trailing zeros.
   315  //   n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ...
   316  //
   317  // @integer and @decimal are followed by examples and are not relevant for the
   318  // rule itself. The are used here to signal the termination of the rule.
   319  func parsePluralCondition(conds []orCondition, s string, f plural.Form) []orCondition {
   320  	scan := bufio.NewScanner(strings.NewReader(s))
   321  	scan.Split(splitTokens)
   322  	for {
   323  		cond := orCondition{original: s, form: f}
   324  		// Set all numbers to be allowed for all number classes and restrict
   325  		// from here on.
   326  		for i := range cond.set {
   327  			for j := range cond.set[i] {
   328  				cond.set[i][j] = true
   329  			}
   330  		}
   331  	andLoop:
   332  		for {
   333  			var token string
   334  			scan.Scan() // Must exist.
   335  			switch class := scan.Text(); class {
   336  			case "t":
   337  				class = "w" // equal to w for t == 0
   338  				fallthrough
   339  			case "n", "i", "f", "v", "w":
   340  				op := scanToken(scan)
   341  				opCode := operandIndex[class]
   342  				mod := 0
   343  				if op == "%" {
   344  					opCode |= opMod
   345  
   346  					switch v := scanUint(scan); v {
   347  					case 10, 100:
   348  						mod = v
   349  					case 1000:
   350  						// A more general solution would be to allow checking
   351  						// against multiples of 100 and include entries for the
   352  						// numbers 100..900 in the inclusion masks. At the
   353  						// moment this would only help Azerbaijan and Italian.
   354  
   355  						// Italian doesn't use '%', so this must be Azerbaijan.
   356  						cond.used[opAzerbaijan00s] = true
   357  						return append(conds, cond)
   358  
   359  					case 1000000:
   360  						cond.used[opBretonM] = true
   361  						return append(conds, cond)
   362  
   363  					default:
   364  						log.Fatalf("Modulo value not supported %d", v)
   365  					}
   366  					op = scanToken(scan)
   367  				}
   368  				if op != "=" && op != "!=" {
   369  					log.Fatalf("Unexpected op %q", op)
   370  				}
   371  				if op == "!=" {
   372  					opCode |= opNotEqual
   373  				}
   374  				a := []int{}
   375  				v := scanUint(scan)
   376  				if class == "w" && v != 0 {
   377  					log.Fatalf("Must compare against zero for operand type %q", class)
   378  				}
   379  				token = scanToken(scan)
   380  				for {
   381  					switch token {
   382  					case "..":
   383  						end := scanUint(scan)
   384  						for ; v <= end; v++ {
   385  							a = append(a, v)
   386  						}
   387  						token = scanToken(scan)
   388  					default: // ",", "or", "and", "@..."
   389  						a = append(a, v)
   390  					}
   391  					if token != "," {
   392  						break
   393  					}
   394  					v = scanUint(scan)
   395  					token = scanToken(scan)
   396  				}
   397  				if !cond.add(opCode, mod, a) {
   398  					// Detected large numbers. As we ruled out Azerbaijan, this
   399  					// must be the many rule for Italian ordinals.
   400  					cond.set[opItalian800] = cond.set[opN]
   401  					cond.used[opItalian800] = true
   402  				}
   403  
   404  			case "@integer", "@decimal": // "other" entry: tests only.
   405  				return conds
   406  			default:
   407  				log.Fatalf("Unexpected operand class %q (%s)", class, s)
   408  			}
   409  			switch token {
   410  			case "or":
   411  				conds = append(conds, cond)
   412  				break andLoop
   413  			case "@integer", "@decimal": // examples
   414  				// There is always an example in practice, so we always terminate here.
   415  				if err := scan.Err(); err != nil {
   416  					log.Fatal(err)
   417  				}
   418  				return append(conds, cond)
   419  			case "and":
   420  				// keep accumulating
   421  			default:
   422  				log.Fatalf("Unexpected token %q", token)
   423  			}
   424  		}
   425  	}
   426  }
   427  
   428  func scanToken(scan *bufio.Scanner) string {
   429  	scan.Scan()
   430  	return scan.Text()
   431  }
   432  
   433  func scanUint(scan *bufio.Scanner) int {
   434  	scan.Scan()
   435  	val, err := strconv.ParseUint(scan.Text(), 10, 32)
   436  	if err != nil {
   437  		log.Fatal(err)
   438  	}
   439  	return int(val)
   440  }
   441  
   442  // splitTokens can be used with bufio.Scanner to tokenize CLDR plural rules.
   443  func splitTokens(data []byte, atEOF bool) (advance int, token []byte, err error) {
   444  	condTokens := [][]byte{
   445  		[]byte(".."),
   446  		[]byte(","),
   447  		[]byte("!="),
   448  		[]byte("="),
   449  	}
   450  	advance, token, err = bufio.ScanWords(data, atEOF)
   451  	for _, t := range condTokens {
   452  		if len(t) >= len(token) {
   453  			continue
   454  		}
   455  		switch p := bytes.Index(token, t); {
   456  		case p == -1:
   457  		case p == 0:
   458  			advance = len(t)
   459  			token = token[:len(t)]
   460  			return advance - len(token) + len(t), token[:len(t)], err
   461  		case p < advance:
   462  			// Don't split when "=" overlaps "!=".
   463  			if t[0] == '=' && token[p-1] == '!' {
   464  				continue
   465  			}
   466  			advance = p
   467  			token = token[:p]
   468  		}
   469  	}
   470  	return advance, token, err
   471  }