github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/golang/text/internal/number/gen_plural.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 // This file generates data for the CLDR plural rules, as defined in 10 // http://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules 11 // 12 // We assume a slightly simplified grammar: 13 // 14 // condition = and_condition ('or' and_condition)* samples 15 // and_condition = relation ('and' relation)* 16 // relation = expr ('=' | '!=') range_list 17 // expr = operand ('%' '10' '0'* )? 18 // operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' 19 // range_list = (range | value) (',' range_list)* 20 // range = value'..'value 21 // value = digit+ 22 // digit = 0|1|2|3|4|5|6|7|8|9 23 // 24 // samples = ('@integer' sampleList)? 25 // ('@decimal' sampleList)? 26 // sampleList = sampleRange (',' sampleRange)* (',' ('…'|'...'))? 27 // sampleRange = decimalValue ('~' decimalValue)? 28 // decimalValue = value ('.' value)? 29 // 30 // Symbol Value 31 // n absolute value of the source number (integer and decimals). 32 // i integer digits of n. 33 // v number of visible fraction digits in n, with trailing zeros. 34 // w number of visible fraction digits in n, without trailing zeros. 35 // f visible fractional digits in n, with trailing zeros. 36 // t visible fractional digits in n, without trailing zeros. 37 // 38 // The algorithm for which the data is generated is based on the following 39 // observations 40 // 41 // - the number of different sets of numbers which the plural rules use to 42 // test inclusion is limited, 43 // - most numbers that are tested on are < 100 44 // 45 // This allows us to define a bitmap for each number < 100 where a bit i 46 // indicates whether this number is included in some defined set i. 47 // The function matchPlural in plural.go defines how we can subsequently use 48 // this data to determine inclusion. 49 // 50 // There are a few languages for which this doesn't work. For one Italian and 51 // Azerbaijan, which both test against numbers > 100 for ordinals and Breton, 52 // which considers whether numbers are multiples of hundreds. The model here 53 // could be extended to handle Italian and Azerbaijan fairly easily (by 54 // considering the numbers 100, 200, 300, ..., 800, 900 in addition to the first 55 // 100), but for now it seems easier to just hard-code these cases. 56 57 import ( 58 "bufio" 59 "bytes" 60 "fmt" 61 "log" 62 "strconv" 63 "strings" 64 65 "github.com/insionng/yougam/libraries/x/text/internal" 66 "github.com/insionng/yougam/libraries/x/text/internal/format/plural" 67 "github.com/insionng/yougam/libraries/x/text/internal/gen" 68 "github.com/insionng/yougam/libraries/x/text/language" 69 "github.com/insionng/yougam/libraries/x/text/unicode/cldr" 70 ) 71 72 type pluralTest struct { 73 locales string // space-separated list of locales for this test 74 form plural.Form 75 integer []string // Entries of the form \d+ or \d+~\d+ 76 decimal []string // Entries of the form \f+ or \f+ +~\f+, where f is \d+\.\d+ 77 } 78 79 func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) { 80 w.WriteType(pluralTest{}) 81 82 for _, plurals := range data.Supplemental().Plurals { 83 if plurals.Type == "" { 84 // The empty type is reserved for plural ranges. 85 continue 86 } 87 tests := []pluralTest{} 88 89 for _, pRules := range plurals.PluralRules { 90 for _, rule := range pRules.PluralRule { 91 test := pluralTest{ 92 locales: pRules.Locales, 93 form: countMap[rule.Count], 94 } 95 scan := bufio.NewScanner(strings.NewReader(rule.Data())) 96 scan.Split(splitTokens) 97 var p *[]string 98 for scan.Scan() { 99 switch t := scan.Text(); t { 100 case "@integer": 101 p = &test.integer 102 case "@decimal": 103 p = &test.decimal 104 case ",", "…": 105 default: 106 if p != nil { 107 *p = append(*p, t) 108 } 109 } 110 } 111 tests = append(tests, test) 112 } 113 } 114 w.WriteVar(plurals.Type+"Tests", tests) 115 } 116 } 117 118 func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) { 119 for _, plurals := range data.Supplemental().Plurals { 120 if plurals.Type == "" { 121 continue 122 } 123 // Initialize setMap and inclusionMasks. They are already populated with 124 // a few entries to serve as an example and to assign nice numbers to 125 // common cases. 126 127 // setMap contains sets of numbers represented by boolean arrays where 128 // a true value for element i means that the number i is included. 129 setMap := map[[numN]bool]int{ 130 // The above init func adds an entry for including all numbers. 131 [numN]bool{1: true}: 1, // fix {1} to a nice value 132 [numN]bool{2: true}: 2, // fix {2} to a nice value 133 [numN]bool{0: true}: 3, // fix {0} to a nice value 134 } 135 136 // inclusionMasks contains bit masks for every number under numN to 137 // indicate in which set the number is included. Bit 1 << x will be set 138 // if it is included in set x. 139 inclusionMasks := [numN]uint64{ 140 // Note: these entries are not complete: more bits will be set along the way. 141 0: 1 << 3, 142 1: 1 << 1, 143 2: 1 << 2, 144 } 145 146 // Create set {0..99}. We will assign this set the identifier 0. 147 var all [numN]bool 148 for i := range all { 149 // Mark number i as being included in the set (which has identifier 0). 150 inclusionMasks[i] |= 1 << 0 151 // Mark number i as included in the set. 152 all[i] = true 153 } 154 // Register the identifier for the set. 155 setMap[all] = 0 156 157 rules := []pluralCheck{} 158 index := []byte{0} 159 langMap := map[int]byte{0: 0} // From compact language index to index 160 161 for _, pRules := range plurals.PluralRules { 162 // Parse the rules. 163 var conds []orCondition 164 for _, rule := range pRules.PluralRule { 165 form := countMap[rule.Count] 166 conds = parsePluralCondition(conds, rule.Data(), form) 167 } 168 // Encode the rules. 169 for _, c := range conds { 170 // If an or condition only has filters, we create an entry for 171 // this filter and the set that contains all values. 172 empty := true 173 for _, b := range c.used { 174 empty = empty && !b 175 } 176 if empty { 177 rules = append(rules, pluralCheck{ 178 cat: byte(opMod<<opShift) | byte(c.form), 179 setID: 0, // all values 180 }) 181 continue 182 } 183 // We have some entries with values. 184 for i, set := range c.set { 185 if !c.used[i] { 186 continue 187 } 188 index, ok := setMap[set] 189 if !ok { 190 index = len(setMap) 191 setMap[set] = index 192 for i := range inclusionMasks { 193 if set[i] { 194 inclusionMasks[i] |= 1 << uint64(index) 195 } 196 } 197 } 198 rules = append(rules, pluralCheck{ 199 cat: byte(i<<opShift | andNext), 200 setID: byte(index), 201 }) 202 } 203 // Now set the last entry to the plural form the rule matches. 204 rules[len(rules)-1].cat &^= formMask 205 rules[len(rules)-1].cat |= byte(c.form) 206 } 207 // Point the relevant locales to the created entries. 208 for _, loc := range strings.Split(pRules.Locales, " ") { 209 if strings.TrimSpace(loc) == "" { 210 continue 211 } 212 lang, ok := language.CompactIndex(language.MustParse(loc)) 213 if !ok { 214 log.Printf("No compact index for locale %q", loc) 215 } 216 langMap[lang] = byte(len(index) - 1) 217 } 218 index = append(index, byte(len(rules))) 219 } 220 w.WriteVar(plurals.Type+"Rules", rules) 221 w.WriteVar(plurals.Type+"Index", index) 222 // Expand the values. 223 langToIndex := make([]byte, language.NumCompactTags) 224 for i := range langToIndex { 225 for p := i; ; p = int(internal.Parent[p]) { 226 if x, ok := langMap[p]; ok { 227 langToIndex[i] = x 228 break 229 } 230 } 231 } 232 w.WriteVar(plurals.Type+"LangToIndex", langToIndex) 233 // Need to convert array to slice because of yougam/libraries/issue/7651. 234 // This will allow tables to be dropped when unused. This is especially 235 // relevant for the ordinal data, which I suspect won't be used as much. 236 w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:]) 237 238 if len(rules) > 0xFF { 239 log.Fatalf("Too many entries for rules: %#x", len(rules)) 240 } 241 if len(index) > 0xFF { 242 log.Fatalf("Too many entries for index: %#x", len(index)) 243 } 244 if len(setMap) > 64 { // maximum number of bits. 245 log.Fatalf("Too many entries for setMap: %d", len(setMap)) 246 } 247 w.WriteComment( 248 "Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets", 249 plurals.Type, len(rules), len(index), len(setMap)) 250 // Prevent comment from attaching to the next entry. 251 fmt.Fprint(w, "\n\n") 252 } 253 } 254 255 type orCondition struct { 256 original string // for debugging 257 258 form plural.Form 259 used [32]bool 260 set [32][numN]bool 261 } 262 263 func (o *orCondition) add(op opID, mod int, v []int) (ok bool) { 264 ok = true 265 for _, x := range v { 266 if x >= maxMod { 267 ok = false 268 break 269 } 270 } 271 for i := 0; i < numN; i++ { 272 m := i 273 if mod != 0 { 274 m = i % mod 275 } 276 if !intIn(m, v) { 277 o.set[op][i] = false 278 } 279 } 280 if ok { 281 o.used[op] = true 282 } 283 return ok 284 } 285 286 func intIn(x int, a []int) bool { 287 for _, y := range a { 288 if x == y { 289 return true 290 } 291 } 292 return false 293 } 294 295 var operandIndex = map[string]opID{ 296 "i": opI, 297 "n": opN, 298 "f": opF, 299 "v": opV, 300 "w": opW, 301 } 302 303 // parsePluralCondition parses the condition of a single pluralRule and appends 304 // the resulting or conditions to conds. 305 // 306 // Example rules: 307 // // Category "one" in English: only allow 1 with no visible fraction 308 // i = 1 and v = 0 @integer 1 309 // 310 // // Category "few" in Czech: all numbers with visible fractions 311 // v != 0 @decimal ... 312 // 313 // // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or 314 // // numbers with a fraction 11..19 and no trailing zeros. 315 // n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... 316 // 317 // @integer and @decimal are followed by examples and are not relevant for the 318 // rule itself. The are used here to signal the termination of the rule. 319 func parsePluralCondition(conds []orCondition, s string, f plural.Form) []orCondition { 320 scan := bufio.NewScanner(strings.NewReader(s)) 321 scan.Split(splitTokens) 322 for { 323 cond := orCondition{original: s, form: f} 324 // Set all numbers to be allowed for all number classes and restrict 325 // from here on. 326 for i := range cond.set { 327 for j := range cond.set[i] { 328 cond.set[i][j] = true 329 } 330 } 331 andLoop: 332 for { 333 var token string 334 scan.Scan() // Must exist. 335 switch class := scan.Text(); class { 336 case "t": 337 class = "w" // equal to w for t == 0 338 fallthrough 339 case "n", "i", "f", "v", "w": 340 op := scanToken(scan) 341 opCode := operandIndex[class] 342 mod := 0 343 if op == "%" { 344 opCode |= opMod 345 346 switch v := scanUint(scan); v { 347 case 10, 100: 348 mod = v 349 case 1000: 350 // A more general solution would be to allow checking 351 // against multiples of 100 and include entries for the 352 // numbers 100..900 in the inclusion masks. At the 353 // moment this would only help Azerbaijan and Italian. 354 355 // Italian doesn't use '%', so this must be Azerbaijan. 356 cond.used[opAzerbaijan00s] = true 357 return append(conds, cond) 358 359 case 1000000: 360 cond.used[opBretonM] = true 361 return append(conds, cond) 362 363 default: 364 log.Fatalf("Modulo value not supported %d", v) 365 } 366 op = scanToken(scan) 367 } 368 if op != "=" && op != "!=" { 369 log.Fatalf("Unexpected op %q", op) 370 } 371 if op == "!=" { 372 opCode |= opNotEqual 373 } 374 a := []int{} 375 v := scanUint(scan) 376 if class == "w" && v != 0 { 377 log.Fatalf("Must compare against zero for operand type %q", class) 378 } 379 token = scanToken(scan) 380 for { 381 switch token { 382 case "..": 383 end := scanUint(scan) 384 for ; v <= end; v++ { 385 a = append(a, v) 386 } 387 token = scanToken(scan) 388 default: // ",", "or", "and", "@..." 389 a = append(a, v) 390 } 391 if token != "," { 392 break 393 } 394 v = scanUint(scan) 395 token = scanToken(scan) 396 } 397 if !cond.add(opCode, mod, a) { 398 // Detected large numbers. As we ruled out Azerbaijan, this 399 // must be the many rule for Italian ordinals. 400 cond.set[opItalian800] = cond.set[opN] 401 cond.used[opItalian800] = true 402 } 403 404 case "@integer", "@decimal": // "other" entry: tests only. 405 return conds 406 default: 407 log.Fatalf("Unexpected operand class %q (%s)", class, s) 408 } 409 switch token { 410 case "or": 411 conds = append(conds, cond) 412 break andLoop 413 case "@integer", "@decimal": // examples 414 // There is always an example in practice, so we always terminate here. 415 if err := scan.Err(); err != nil { 416 log.Fatal(err) 417 } 418 return append(conds, cond) 419 case "and": 420 // keep accumulating 421 default: 422 log.Fatalf("Unexpected token %q", token) 423 } 424 } 425 } 426 } 427 428 func scanToken(scan *bufio.Scanner) string { 429 scan.Scan() 430 return scan.Text() 431 } 432 433 func scanUint(scan *bufio.Scanner) int { 434 scan.Scan() 435 val, err := strconv.ParseUint(scan.Text(), 10, 32) 436 if err != nil { 437 log.Fatal(err) 438 } 439 return int(val) 440 } 441 442 // splitTokens can be used with bufio.Scanner to tokenize CLDR plural rules. 443 func splitTokens(data []byte, atEOF bool) (advance int, token []byte, err error) { 444 condTokens := [][]byte{ 445 []byte(".."), 446 []byte(","), 447 []byte("!="), 448 []byte("="), 449 } 450 advance, token, err = bufio.ScanWords(data, atEOF) 451 for _, t := range condTokens { 452 if len(t) >= len(token) { 453 continue 454 } 455 switch p := bytes.Index(token, t); { 456 case p == -1: 457 case p == 0: 458 advance = len(t) 459 token = token[:len(t)] 460 return advance - len(token) + len(t), token[:len(t)], err 461 case p < advance: 462 // Don't split when "=" overlaps "!=". 463 if t[0] == '=' && token[p-1] == '!' { 464 continue 465 } 466 advance = p 467 token = token[:p] 468 } 469 } 470 return advance, token, err 471 }