github.com/altipla-consulting/ravendb-go-client@v0.1.3/inflect.go (about) 1 package ravendb 2 3 // this is inflect.go from https://github.com/kjk/inflect 4 // included directly to minimize dependencies 5 // under MIT license: https://github.com/kjk/inflect/blob/master/LICENSE 6 7 import ( 8 "regexp" 9 "strconv" 10 "strings" 11 "unicode" 12 "unicode/utf8" 13 ) 14 15 var irregularRules = [][]string{ 16 // Pronouns. 17 {"I", "we"}, 18 {"me", "us"}, 19 {"he", "they"}, 20 {"she", "they"}, 21 {"them", "them"}, 22 {"myself", "ourselves"}, 23 {"yourself", "yourselves"}, 24 {"itself", "themselves"}, 25 {"herself", "themselves"}, 26 {"himself", "themselves"}, 27 {"themself", "themselves"}, 28 {"is", "are"}, 29 {"was", "were"}, 30 {"has", "have"}, 31 {"this", "these"}, 32 {"that", "those"}, 33 // Words ending in with a consonant and `o`. 34 {"echo", "echoes"}, 35 {"dingo", "dingoes"}, 36 {"volcano", "volcanoes"}, 37 {"tornado", "tornadoes"}, 38 {"torpedo", "torpedoes"}, 39 // Ends with `us`. 40 {"genus", "genera"}, 41 {"viscus", "viscera"}, 42 // Ends with `ma`. 43 {"stigma", "stigmata"}, 44 {"stoma", "stomata"}, 45 {"dogma", "dogmata"}, 46 {"lemma", "lemmata"}, 47 {"schema", "schemata"}, 48 {"anathema", "anathemata"}, 49 // Other irregular rules. 50 {"ox", "oxen"}, 51 {"axe", "axes"}, 52 {"die", "dice"}, 53 {"yes", "yeses"}, 54 {"foot", "feet"}, 55 {"eave", "eaves"}, 56 {"goose", "geese"}, 57 {"tooth", "teeth"}, 58 {"quiz", "quizzes"}, 59 {"human", "humans"}, 60 {"proof", "proofs"}, 61 {"carve", "carves"}, 62 {"valve", "valves"}, 63 {"looey", "looies"}, 64 {"thief", "thieves"}, 65 {"groove", "grooves"}, 66 {"pickaxe", "pickaxes"}, 67 {"whiskey", "whiskies"}, 68 } 69 70 var pluralizationRules = [][]string{ 71 {`/s?$/i`, `s`}, 72 {`/[^\u0000-\u007F]$/i`, `$0`}, 73 {`/([^aeiou]ese)$/i`, `$1`}, 74 {`/(ax|test)is$/i`, `$1es`}, 75 {`/(alias|[^aou]us|t[lm]as|gas|ris)$/i`, `$1es`}, 76 {`/(e[mn]u)s?$/i`, `$1s`}, 77 {`/([^l]ias|[aeiou]las|[ejzr]as|[iu]am)$/i`, `$1`}, 78 {`/(alumn|syllab|octop|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/i`, `$1i`}, 79 {`/(alumn|alg|vertebr)(?:a|ae)$/i`, `$1ae`}, 80 {`/(seraph|cherub)(?:im)?$/i`, `$1im`}, 81 {`/(her|at|gr)o$/i`, `$1oes`}, 82 {`/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|automat|quor)(?:a|um)$/i`, `$1a`}, 83 {`/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)(?:a|on)$/i`, `$1a`}, 84 {`/sis$/i`, `ses`}, 85 {`/(?:(kni|wi|li)fe|(ar|l|ea|eo|oa|hoo)f)$/i`, `$1$2ves`}, 86 {`/([^aeiouy]|qu)y$/i`, `$1ies`}, 87 {`/([^ch][ieo][ln])ey$/i`, `$1ies`}, 88 {`/(x|ch|ss|sh|zz)$/i`, `$1es`}, 89 {`/(matr|cod|mur|sil|vert|ind|append)(?:ix|ex)$/i`, `$1ices`}, 90 {`/\b((?:tit)?m|l)(?:ice|ouse)$/i`, `$1ice`}, 91 {`/(pe)(?:rson|ople)$/i`, `$1ople`}, 92 {`/(child)(?:ren)?$/i`, `$1ren`}, 93 {`/eaux$/i`, `$0`}, 94 {`/m[ae]n$/i`, `men`}, 95 {`thou`, `you`}, 96 } 97 98 var singularizationRules = [][]string{ 99 {`/s$/i`, ``}, 100 {`/(ss)$/i`, `$1`}, 101 {`/(wi|kni|(?:after|half|high|low|mid|non|night|[^\w]|^)li)ves$/i`, `$1fe`}, 102 {`/(ar|(?:wo|[ae])l|[eo][ao])ves$/i`, `$1f`}, 103 {`/ies$/i`, `y`}, 104 {`/\b([pl]|zomb|(?:neck|cross)?t|coll|faer|food|gen|goon|group|lass|talk|goal|cut)ies$/i`, `$1ie`}, 105 {`/\b(mon|smil)ies$/i`, `$1ey`}, 106 {`/\b((?:tit)?m|l)ice$/i`, `$1ouse`}, 107 {`/(seraph|cherub)im$/i`, `$1`}, 108 {`/(x|ch|ss|sh|zz|tto|go|cho|alias|[^aou]us|t[lm]as|gas|(?:her|at|gr)o|ris)(?:es)?$/i`, `$1`}, 109 {`/(analy|ba|diagno|parenthe|progno|synop|the|empha|cri)(?:sis|ses)$/i`, `$1sis`}, 110 {`/(movie|twelve|abuse|e[mn]u)s$/i`, `$1`}, 111 {`/(test)(?:is|es)$/i`, `$1is`}, 112 {`/(alumn|syllab|octop|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/i`, `$1us`}, 113 {`/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|quor)a$/i`, `$1um`}, 114 {`/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)a$/i`, `$1on`}, 115 {`/(alumn|alg|vertebr)ae$/i`, `$1a`}, 116 {`/(cod|mur|sil|vert|ind)ices$/i`, `$1ex`}, 117 {`/(matr|append)ices$/i`, `$1ix`}, 118 {`/(pe)(rson|ople)$/i`, `$1rson`}, 119 {`/(child)ren$/i`, `$1`}, 120 {`/(eau)x?$/i`, `$1`}, 121 {`/men$/i`, `man`}, 122 } 123 124 //Uncountable rules. 125 var uncountableRules = []string{ 126 // singular words with no plurals. 127 "adulthood", 128 "advice", 129 "agenda", 130 "aid", 131 "alcohol", 132 "ammo", 133 "anime", 134 "athletics", 135 "audio", 136 "bison", 137 "blood", 138 "bream", 139 "buffalo", 140 "butter", 141 "carp", 142 "cash", 143 "chassis", 144 "chess", 145 "clothing", 146 "cod", 147 "commerce", 148 "cooperation", 149 "corps", 150 "debris", 151 "diabetes", 152 "digestion", 153 "elk", 154 "energy", 155 "equipment", 156 "excretion", 157 "expertise", 158 "flounder", 159 "fun", 160 "gallows", 161 "garbage", 162 "graffiti", 163 "headquarters", 164 "health", 165 "herpes", 166 "highjinks", 167 "homework", 168 "housework", 169 "information", 170 "jeans", 171 "justice", 172 "kudos", 173 "labour", 174 "literature", 175 "machinery", 176 "mackerel", 177 "mail", 178 "media", 179 "mews", 180 "moose", 181 "music", 182 "mud", 183 "manga", 184 "news", 185 "pike", 186 "plankton", 187 "pliers", 188 "police", 189 "pollution", 190 "premises", 191 "rain", 192 "research", 193 "rice", 194 "salmon", 195 "scissors", 196 "series", 197 "sewage", 198 "shambles", 199 "shrimp", 200 "species", 201 "staff", 202 "swine", 203 "tennis", 204 "traffic", 205 "transportation", 206 "trout", 207 "tuna", 208 "wealth", 209 "welfare", 210 "whiting", 211 "wildebeest", 212 "wildlife", 213 "you", 214 // Regexes. 215 `/[^aeiou]ese$/i`, // "chinese", "japanese" 216 `/deer$/i`, // "deer", "reindeer" 217 `/fish$/i`, // "fish", "blowfish", "angelfish" 218 `/measles$/i`, 219 `/o[iu]s$/i`, // "carnivorous" 220 `/pox$/i`, // "chickpox", "smallpox" 221 `/sheep$/i`, 222 } 223 224 type rxRule struct { 225 // TODO: for debugging, maybe remove when working 226 rxStrJs string 227 rxStrGo string 228 229 rx *regexp.Regexp 230 replacement string 231 } 232 233 // Rule storage - pluralize and singularize need to be run sequentially, 234 // while other rules can be optimized using an object for instant lookups. 235 var pluralRules []rxRule 236 var singularRules []rxRule 237 var irregularPlurals = map[string]string{} 238 var irregularSingles = map[string]string{} 239 var uncountables = map[string]string{} 240 241 func init() { 242 // order is important 243 addIrregularRules() 244 addPluralizationRules() 245 addSingularizationRules() 246 addUncountableRules() 247 } 248 249 // Add a pluralization rule to the collection. 250 func addPluralRule(rule string, replacement string) { 251 rx, rxStrGo := sanitizeRule(rule) 252 r := rxRule{ 253 rxStrJs: rule, 254 rxStrGo: rxStrGo, 255 rx: rx, 256 replacement: jsReplaceSyntaxToGo(replacement), 257 } 258 pluralRules = append(pluralRules, r) 259 } 260 261 var ( 262 unicodeSyntaxRx = regexp.MustCompile(`\\u([[:xdigit:]]{4})`) 263 ) 264 265 // best-effort of converting javascript regex syntax to equivalent go syntax 266 func jsRxSyntaxToGo(rx string) string { 267 s := rx 268 caseInsensitive := false 269 panicIf(s[0] != '/', "expected '%s' to start with '/'", rx) 270 s = s[1:] 271 n := len(s) 272 if s[n-1] == 'i' { 273 n-- 274 caseInsensitive = true 275 s = s[:n] 276 } 277 panicIf(s[n-1] != '/', "expected '%s' to end with '/'", rx) 278 s = s[:n-1] 279 // \uNNNN syntax for unicode code points to \x{NNNN} syntax for hex character code 280 s = unicodeSyntaxRx.ReplaceAllString(s, "\\x{$1}") 281 if caseInsensitive { 282 s = "(?i)" + s 283 } 284 return s 285 } 286 287 func jsReplaceSyntaxToGo(s string) string { 288 s = strings.Replace(s, "$0", "${0}", -1) 289 s = strings.Replace(s, "$1", "${1}", -1) 290 s = strings.Replace(s, "$2", "${2}", -1) 291 return s 292 } 293 294 // Sanitize a pluralization rule to a usable regular expression. 295 func sanitizeRule(rule string) (*regexp.Regexp, string) { 296 // in JavaScript, regexpes start with / 297 // others are just regular strings 298 var s string 299 if rule[0] != '/' { 300 // a plain string match is converted to regexp that: 301 // ^ ... $ : does exact match (matches at the beginning and end) 302 // (?i) : is case-insensitive 303 s = `(?i)^` + rule + `$` 304 } else { 305 s = jsRxSyntaxToGo(rule) 306 } 307 return regexp.MustCompile(s), s 308 } 309 310 // Add a singularization rule to the collection. 311 func addSingularRule(rule, replacement string) { 312 rx, rxGo := sanitizeRule(rule) 313 r := rxRule{ 314 rxStrJs: rule, 315 rxStrGo: rxGo, 316 rx: rx, 317 replacement: jsReplaceSyntaxToGo(replacement), 318 } 319 singularRules = append(singularRules, r) 320 } 321 322 // copied from strings.ToUpper 323 // returns true if s is uppercase 324 func isUpper(s string) bool { 325 isASCII, hasLower := true, false 326 for i := 0; i < len(s); i++ { 327 c := s[i] 328 if c >= utf8.RuneSelf { 329 isASCII = false 330 break 331 } 332 hasLower = hasLower || (c >= 'a' && c <= 'z') 333 } 334 if isASCII { 335 return !hasLower 336 } 337 for r := range s { 338 if !unicode.IsUpper(rune(r)) { 339 return false 340 } 341 } 342 return true 343 } 344 345 // Pass in a word token to produce a function that can replicate the case on 346 // another word. 347 func restoreCase(word string, token string) string { 348 // Tokens are an exact match. 349 if word == token { 350 return token 351 } 352 353 // Upper cased words. E.g. "HELLO". 354 if isUpper(word) { 355 return strings.ToUpper(token) 356 } 357 358 // Title cased words. E.g. "Title". 359 prefix := word[:1] 360 if isUpper(prefix) { 361 return strings.ToUpper(token[:1]) + strings.ToLower(token[1:]) 362 } 363 364 // Lower cased words. E.g. "test". 365 return strings.ToLower(token) 366 } 367 368 // Replace a word using a rule. 369 func replace(word string, rule rxRule) string { 370 // TODO: not sure if this covers all possibilities 371 repl := rule.replacement 372 if isUpper(word) { 373 repl = strings.ToUpper(repl) 374 } 375 return rule.rx.ReplaceAllString(word, repl) 376 } 377 378 // Sanitize a word by passing in the word and sanitization rules. 379 func sanitizeWord(token string, word string, rules []rxRule) string { 380 // Empty string or doesn't need fixing. 381 if len(token) == 0 { 382 return word 383 } 384 if _, ok := uncountables[token]; ok { 385 return word 386 } 387 388 // Iterate over the sanitization rules and use the first one to match. 389 // important that we iterate from the end 390 n := len(rules) 391 for i := n - 1; i >= 0; i-- { 392 rule := rules[i] 393 if rule.rx.MatchString(word) { 394 return replace(word, rule) 395 } 396 } 397 return word 398 } 399 400 // Replace a word with the updated word. 401 func replaceWord(word string, replaceMap map[string]string, keepMap map[string]string, rules []rxRule) string { 402 // Get the correct token and case restoration functions. 403 token := strings.ToLower(word) 404 405 // Check against the keep object map. 406 if _, ok := keepMap[token]; ok { 407 return restoreCase(word, token) 408 } 409 410 // Check against the replacement map for a direct word replacement. 411 if s, ok := replaceMap[token]; ok { 412 return restoreCase(word, s) 413 } 414 415 // Run all the rules against the word. 416 return sanitizeWord(token, word, rules) 417 } 418 419 // Check if a word is part of the map. 420 func checkWord(word string, replaceMap map[string]string, keepMap map[string]string, rules []rxRule) bool { 421 token := strings.ToLower(word) 422 423 if _, ok := keepMap[token]; ok { 424 return true 425 } 426 427 if _, ok := replaceMap[token]; ok { 428 return false 429 } 430 431 return sanitizeWord(token, token, rules) == token 432 } 433 434 // Add an irregular word definition. 435 func addIrregularRules() { 436 for _, rule := range irregularRules { 437 single := strings.ToLower(rule[0]) 438 plural := strings.ToLower(rule[1]) 439 440 irregularSingles[single] = plural 441 irregularPlurals[plural] = single 442 } 443 } 444 445 func addSingularizationRules() { 446 for _, r := range singularizationRules { 447 addSingularRule(r[0], r[1]) 448 } 449 } 450 451 func addUncountableRules() { 452 for _, word := range uncountableRules { 453 if word[0] != '/' { 454 word = strings.ToLower(word) 455 uncountables[word] = word 456 continue 457 } 458 // Set singular and plural references for the word. 459 addPluralRule(word, "$0") 460 addSingularRule(word, "$0") 461 } 462 } 463 464 func addPluralizationRules() { 465 for _, rule := range pluralizationRules { 466 addPluralRule(rule[0], rule[1]) 467 } 468 } 469 470 // Pluralize or singularize a word based on the passed in count. 471 func Pluralize(word string, count int, inclusive bool) string { 472 var res string 473 if count == 1 { 474 res = ToSingular(word) 475 } else { 476 res = ToPlural(word) 477 } 478 479 if inclusive { 480 return strconv.Itoa(count) + " " + res 481 } 482 return res 483 } 484 485 // IsPlural retruns true if word is plural 486 func IsPlural(word string) bool { 487 return checkWord(word, irregularSingles, irregularPlurals, pluralRules) 488 } 489 490 // ToSingular singularizes a word. 491 func ToSingular(word string) string { 492 return replaceWord(word, irregularPlurals, irregularSingles, singularRules) 493 } 494 495 // IsSingular returns true if a word is singular 496 func IsSingular(word string) bool { 497 return checkWord(word, irregularPlurals, irregularSingles, singularRules) 498 } 499 500 // ToPlural makes a pluralized version of a word 501 func ToPlural(word string) string { 502 return replaceWord(word, irregularSingles, irregularPlurals, pluralRules) 503 }