github.com/altipla-consulting/ravendb-go-client@v0.1.3/inflect.go (about)

     1  package ravendb
     2  
     3  // this is inflect.go from https://github.com/kjk/inflect
     4  // included directly to minimize dependencies
     5  // under MIT license: https://github.com/kjk/inflect/blob/master/LICENSE
     6  
     7  import (
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  )
    14  
    15  var irregularRules = [][]string{
    16  	// Pronouns.
    17  	{"I", "we"},
    18  	{"me", "us"},
    19  	{"he", "they"},
    20  	{"she", "they"},
    21  	{"them", "them"},
    22  	{"myself", "ourselves"},
    23  	{"yourself", "yourselves"},
    24  	{"itself", "themselves"},
    25  	{"herself", "themselves"},
    26  	{"himself", "themselves"},
    27  	{"themself", "themselves"},
    28  	{"is", "are"},
    29  	{"was", "were"},
    30  	{"has", "have"},
    31  	{"this", "these"},
    32  	{"that", "those"},
    33  	// Words ending in with a consonant and `o`.
    34  	{"echo", "echoes"},
    35  	{"dingo", "dingoes"},
    36  	{"volcano", "volcanoes"},
    37  	{"tornado", "tornadoes"},
    38  	{"torpedo", "torpedoes"},
    39  	// Ends with `us`.
    40  	{"genus", "genera"},
    41  	{"viscus", "viscera"},
    42  	// Ends with `ma`.
    43  	{"stigma", "stigmata"},
    44  	{"stoma", "stomata"},
    45  	{"dogma", "dogmata"},
    46  	{"lemma", "lemmata"},
    47  	{"schema", "schemata"},
    48  	{"anathema", "anathemata"},
    49  	// Other irregular rules.
    50  	{"ox", "oxen"},
    51  	{"axe", "axes"},
    52  	{"die", "dice"},
    53  	{"yes", "yeses"},
    54  	{"foot", "feet"},
    55  	{"eave", "eaves"},
    56  	{"goose", "geese"},
    57  	{"tooth", "teeth"},
    58  	{"quiz", "quizzes"},
    59  	{"human", "humans"},
    60  	{"proof", "proofs"},
    61  	{"carve", "carves"},
    62  	{"valve", "valves"},
    63  	{"looey", "looies"},
    64  	{"thief", "thieves"},
    65  	{"groove", "grooves"},
    66  	{"pickaxe", "pickaxes"},
    67  	{"whiskey", "whiskies"},
    68  }
    69  
    70  var pluralizationRules = [][]string{
    71  	{`/s?$/i`, `s`},
    72  	{`/[^\u0000-\u007F]$/i`, `$0`},
    73  	{`/([^aeiou]ese)$/i`, `$1`},
    74  	{`/(ax|test)is$/i`, `$1es`},
    75  	{`/(alias|[^aou]us|t[lm]as|gas|ris)$/i`, `$1es`},
    76  	{`/(e[mn]u)s?$/i`, `$1s`},
    77  	{`/([^l]ias|[aeiou]las|[ejzr]as|[iu]am)$/i`, `$1`},
    78  	{`/(alumn|syllab|octop|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/i`, `$1i`},
    79  	{`/(alumn|alg|vertebr)(?:a|ae)$/i`, `$1ae`},
    80  	{`/(seraph|cherub)(?:im)?$/i`, `$1im`},
    81  	{`/(her|at|gr)o$/i`, `$1oes`},
    82  	{`/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|automat|quor)(?:a|um)$/i`, `$1a`},
    83  	{`/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)(?:a|on)$/i`, `$1a`},
    84  	{`/sis$/i`, `ses`},
    85  	{`/(?:(kni|wi|li)fe|(ar|l|ea|eo|oa|hoo)f)$/i`, `$1$2ves`},
    86  	{`/([^aeiouy]|qu)y$/i`, `$1ies`},
    87  	{`/([^ch][ieo][ln])ey$/i`, `$1ies`},
    88  	{`/(x|ch|ss|sh|zz)$/i`, `$1es`},
    89  	{`/(matr|cod|mur|sil|vert|ind|append)(?:ix|ex)$/i`, `$1ices`},
    90  	{`/\b((?:tit)?m|l)(?:ice|ouse)$/i`, `$1ice`},
    91  	{`/(pe)(?:rson|ople)$/i`, `$1ople`},
    92  	{`/(child)(?:ren)?$/i`, `$1ren`},
    93  	{`/eaux$/i`, `$0`},
    94  	{`/m[ae]n$/i`, `men`},
    95  	{`thou`, `you`},
    96  }
    97  
    98  var singularizationRules = [][]string{
    99  	{`/s$/i`, ``},
   100  	{`/(ss)$/i`, `$1`},
   101  	{`/(wi|kni|(?:after|half|high|low|mid|non|night|[^\w]|^)li)ves$/i`, `$1fe`},
   102  	{`/(ar|(?:wo|[ae])l|[eo][ao])ves$/i`, `$1f`},
   103  	{`/ies$/i`, `y`},
   104  	{`/\b([pl]|zomb|(?:neck|cross)?t|coll|faer|food|gen|goon|group|lass|talk|goal|cut)ies$/i`, `$1ie`},
   105  	{`/\b(mon|smil)ies$/i`, `$1ey`},
   106  	{`/\b((?:tit)?m|l)ice$/i`, `$1ouse`},
   107  	{`/(seraph|cherub)im$/i`, `$1`},
   108  	{`/(x|ch|ss|sh|zz|tto|go|cho|alias|[^aou]us|t[lm]as|gas|(?:her|at|gr)o|ris)(?:es)?$/i`, `$1`},
   109  	{`/(analy|ba|diagno|parenthe|progno|synop|the|empha|cri)(?:sis|ses)$/i`, `$1sis`},
   110  	{`/(movie|twelve|abuse|e[mn]u)s$/i`, `$1`},
   111  	{`/(test)(?:is|es)$/i`, `$1is`},
   112  	{`/(alumn|syllab|octop|vir|radi|nucle|fung|cact|stimul|termin|bacill|foc|uter|loc|strat)(?:us|i)$/i`, `$1us`},
   113  	{`/(agend|addend|millenni|dat|extrem|bacteri|desiderat|strat|candelabr|errat|ov|symposi|curricul|quor)a$/i`, `$1um`},
   114  	{`/(apheli|hyperbat|periheli|asyndet|noumen|phenomen|criteri|organ|prolegomen|hedr|automat)a$/i`, `$1on`},
   115  	{`/(alumn|alg|vertebr)ae$/i`, `$1a`},
   116  	{`/(cod|mur|sil|vert|ind)ices$/i`, `$1ex`},
   117  	{`/(matr|append)ices$/i`, `$1ix`},
   118  	{`/(pe)(rson|ople)$/i`, `$1rson`},
   119  	{`/(child)ren$/i`, `$1`},
   120  	{`/(eau)x?$/i`, `$1`},
   121  	{`/men$/i`, `man`},
   122  }
   123  
   124  //Uncountable rules.
   125  var uncountableRules = []string{
   126  	// singular words with no plurals.
   127  	"adulthood",
   128  	"advice",
   129  	"agenda",
   130  	"aid",
   131  	"alcohol",
   132  	"ammo",
   133  	"anime",
   134  	"athletics",
   135  	"audio",
   136  	"bison",
   137  	"blood",
   138  	"bream",
   139  	"buffalo",
   140  	"butter",
   141  	"carp",
   142  	"cash",
   143  	"chassis",
   144  	"chess",
   145  	"clothing",
   146  	"cod",
   147  	"commerce",
   148  	"cooperation",
   149  	"corps",
   150  	"debris",
   151  	"diabetes",
   152  	"digestion",
   153  	"elk",
   154  	"energy",
   155  	"equipment",
   156  	"excretion",
   157  	"expertise",
   158  	"flounder",
   159  	"fun",
   160  	"gallows",
   161  	"garbage",
   162  	"graffiti",
   163  	"headquarters",
   164  	"health",
   165  	"herpes",
   166  	"highjinks",
   167  	"homework",
   168  	"housework",
   169  	"information",
   170  	"jeans",
   171  	"justice",
   172  	"kudos",
   173  	"labour",
   174  	"literature",
   175  	"machinery",
   176  	"mackerel",
   177  	"mail",
   178  	"media",
   179  	"mews",
   180  	"moose",
   181  	"music",
   182  	"mud",
   183  	"manga",
   184  	"news",
   185  	"pike",
   186  	"plankton",
   187  	"pliers",
   188  	"police",
   189  	"pollution",
   190  	"premises",
   191  	"rain",
   192  	"research",
   193  	"rice",
   194  	"salmon",
   195  	"scissors",
   196  	"series",
   197  	"sewage",
   198  	"shambles",
   199  	"shrimp",
   200  	"species",
   201  	"staff",
   202  	"swine",
   203  	"tennis",
   204  	"traffic",
   205  	"transportation",
   206  	"trout",
   207  	"tuna",
   208  	"wealth",
   209  	"welfare",
   210  	"whiting",
   211  	"wildebeest",
   212  	"wildlife",
   213  	"you",
   214  	// Regexes.
   215  	`/[^aeiou]ese$/i`, // "chinese", "japanese"
   216  	`/deer$/i`,        // "deer", "reindeer"
   217  	`/fish$/i`,        // "fish", "blowfish", "angelfish"
   218  	`/measles$/i`,
   219  	`/o[iu]s$/i`, // "carnivorous"
   220  	`/pox$/i`,    // "chickpox", "smallpox"
   221  	`/sheep$/i`,
   222  }
   223  
   224  type rxRule struct {
   225  	// TODO: for debugging, maybe remove when working
   226  	rxStrJs string
   227  	rxStrGo string
   228  
   229  	rx          *regexp.Regexp
   230  	replacement string
   231  }
   232  
   233  // Rule storage - pluralize and singularize need to be run sequentially,
   234  // while other rules can be optimized using an object for instant lookups.
   235  var pluralRules []rxRule
   236  var singularRules []rxRule
   237  var irregularPlurals = map[string]string{}
   238  var irregularSingles = map[string]string{}
   239  var uncountables = map[string]string{}
   240  
   241  func init() {
   242  	// order is important
   243  	addIrregularRules()
   244  	addPluralizationRules()
   245  	addSingularizationRules()
   246  	addUncountableRules()
   247  }
   248  
   249  // Add a pluralization rule to the collection.
   250  func addPluralRule(rule string, replacement string) {
   251  	rx, rxStrGo := sanitizeRule(rule)
   252  	r := rxRule{
   253  		rxStrJs:     rule,
   254  		rxStrGo:     rxStrGo,
   255  		rx:          rx,
   256  		replacement: jsReplaceSyntaxToGo(replacement),
   257  	}
   258  	pluralRules = append(pluralRules, r)
   259  }
   260  
   261  var (
   262  	unicodeSyntaxRx = regexp.MustCompile(`\\u([[:xdigit:]]{4})`)
   263  )
   264  
   265  // best-effort of converting javascript regex syntax to equivalent go syntax
   266  func jsRxSyntaxToGo(rx string) string {
   267  	s := rx
   268  	caseInsensitive := false
   269  	panicIf(s[0] != '/', "expected '%s' to start with '/'", rx)
   270  	s = s[1:]
   271  	n := len(s)
   272  	if s[n-1] == 'i' {
   273  		n--
   274  		caseInsensitive = true
   275  		s = s[:n]
   276  	}
   277  	panicIf(s[n-1] != '/', "expected '%s' to end with '/'", rx)
   278  	s = s[:n-1]
   279  	// \uNNNN syntax for unicode code points to \x{NNNN} syntax for hex character code
   280  	s = unicodeSyntaxRx.ReplaceAllString(s, "\\x{$1}")
   281  	if caseInsensitive {
   282  		s = "(?i)" + s
   283  	}
   284  	return s
   285  }
   286  
   287  func jsReplaceSyntaxToGo(s string) string {
   288  	s = strings.Replace(s, "$0", "${0}", -1)
   289  	s = strings.Replace(s, "$1", "${1}", -1)
   290  	s = strings.Replace(s, "$2", "${2}", -1)
   291  	return s
   292  }
   293  
   294  // Sanitize a pluralization rule to a usable regular expression.
   295  func sanitizeRule(rule string) (*regexp.Regexp, string) {
   296  	// in JavaScript, regexpes start with /
   297  	// others are just regular strings
   298  	var s string
   299  	if rule[0] != '/' {
   300  		// a plain string match is converted to regexp that:
   301  		// ^ ... $ : does exact match (matches at the beginning and end)
   302  		// (?i) : is case-insensitive
   303  		s = `(?i)^` + rule + `$`
   304  	} else {
   305  		s = jsRxSyntaxToGo(rule)
   306  	}
   307  	return regexp.MustCompile(s), s
   308  }
   309  
   310  // Add a singularization rule to the collection.
   311  func addSingularRule(rule, replacement string) {
   312  	rx, rxGo := sanitizeRule(rule)
   313  	r := rxRule{
   314  		rxStrJs:     rule,
   315  		rxStrGo:     rxGo,
   316  		rx:          rx,
   317  		replacement: jsReplaceSyntaxToGo(replacement),
   318  	}
   319  	singularRules = append(singularRules, r)
   320  }
   321  
   322  // copied from strings.ToUpper
   323  // returns true if s is uppercase
   324  func isUpper(s string) bool {
   325  	isASCII, hasLower := true, false
   326  	for i := 0; i < len(s); i++ {
   327  		c := s[i]
   328  		if c >= utf8.RuneSelf {
   329  			isASCII = false
   330  			break
   331  		}
   332  		hasLower = hasLower || (c >= 'a' && c <= 'z')
   333  	}
   334  	if isASCII {
   335  		return !hasLower
   336  	}
   337  	for r := range s {
   338  		if !unicode.IsUpper(rune(r)) {
   339  			return false
   340  		}
   341  	}
   342  	return true
   343  }
   344  
   345  // Pass in a word token to produce a function that can replicate the case on
   346  // another word.
   347  func restoreCase(word string, token string) string {
   348  	// Tokens are an exact match.
   349  	if word == token {
   350  		return token
   351  	}
   352  
   353  	// Upper cased words. E.g. "HELLO".
   354  	if isUpper(word) {
   355  		return strings.ToUpper(token)
   356  	}
   357  
   358  	// Title cased words. E.g. "Title".
   359  	prefix := word[:1]
   360  	if isUpper(prefix) {
   361  		return strings.ToUpper(token[:1]) + strings.ToLower(token[1:])
   362  	}
   363  
   364  	// Lower cased words. E.g. "test".
   365  	return strings.ToLower(token)
   366  }
   367  
   368  // Replace a word using a rule.
   369  func replace(word string, rule rxRule) string {
   370  	// TODO: not sure if this covers all possibilities
   371  	repl := rule.replacement
   372  	if isUpper(word) {
   373  		repl = strings.ToUpper(repl)
   374  	}
   375  	return rule.rx.ReplaceAllString(word, repl)
   376  }
   377  
   378  // Sanitize a word by passing in the word and sanitization rules.
   379  func sanitizeWord(token string, word string, rules []rxRule) string {
   380  	// Empty string or doesn't need fixing.
   381  	if len(token) == 0 {
   382  		return word
   383  	}
   384  	if _, ok := uncountables[token]; ok {
   385  		return word
   386  	}
   387  
   388  	// Iterate over the sanitization rules and use the first one to match.
   389  	// important that we iterate from the end
   390  	n := len(rules)
   391  	for i := n - 1; i >= 0; i-- {
   392  		rule := rules[i]
   393  		if rule.rx.MatchString(word) {
   394  			return replace(word, rule)
   395  		}
   396  	}
   397  	return word
   398  }
   399  
   400  // Replace a word with the updated word.
   401  func replaceWord(word string, replaceMap map[string]string, keepMap map[string]string, rules []rxRule) string {
   402  	// Get the correct token and case restoration functions.
   403  	token := strings.ToLower(word)
   404  
   405  	// Check against the keep object map.
   406  	if _, ok := keepMap[token]; ok {
   407  		return restoreCase(word, token)
   408  	}
   409  
   410  	// Check against the replacement map for a direct word replacement.
   411  	if s, ok := replaceMap[token]; ok {
   412  		return restoreCase(word, s)
   413  	}
   414  
   415  	// Run all the rules against the word.
   416  	return sanitizeWord(token, word, rules)
   417  }
   418  
   419  // Check if a word is part of the map.
   420  func checkWord(word string, replaceMap map[string]string, keepMap map[string]string, rules []rxRule) bool {
   421  	token := strings.ToLower(word)
   422  
   423  	if _, ok := keepMap[token]; ok {
   424  		return true
   425  	}
   426  
   427  	if _, ok := replaceMap[token]; ok {
   428  		return false
   429  	}
   430  
   431  	return sanitizeWord(token, token, rules) == token
   432  }
   433  
   434  // Add an irregular word definition.
   435  func addIrregularRules() {
   436  	for _, rule := range irregularRules {
   437  		single := strings.ToLower(rule[0])
   438  		plural := strings.ToLower(rule[1])
   439  
   440  		irregularSingles[single] = plural
   441  		irregularPlurals[plural] = single
   442  	}
   443  }
   444  
   445  func addSingularizationRules() {
   446  	for _, r := range singularizationRules {
   447  		addSingularRule(r[0], r[1])
   448  	}
   449  }
   450  
   451  func addUncountableRules() {
   452  	for _, word := range uncountableRules {
   453  		if word[0] != '/' {
   454  			word = strings.ToLower(word)
   455  			uncountables[word] = word
   456  			continue
   457  		}
   458  		// Set singular and plural references for the word.
   459  		addPluralRule(word, "$0")
   460  		addSingularRule(word, "$0")
   461  	}
   462  }
   463  
   464  func addPluralizationRules() {
   465  	for _, rule := range pluralizationRules {
   466  		addPluralRule(rule[0], rule[1])
   467  	}
   468  }
   469  
   470  // Pluralize or singularize a word based on the passed in count.
   471  func Pluralize(word string, count int, inclusive bool) string {
   472  	var res string
   473  	if count == 1 {
   474  		res = ToSingular(word)
   475  	} else {
   476  		res = ToPlural(word)
   477  	}
   478  
   479  	if inclusive {
   480  		return strconv.Itoa(count) + " " + res
   481  	}
   482  	return res
   483  }
   484  
   485  // IsPlural retruns true if word is plural
   486  func IsPlural(word string) bool {
   487  	return checkWord(word, irregularSingles, irregularPlurals, pluralRules)
   488  }
   489  
   490  // ToSingular singularizes a word.
   491  func ToSingular(word string) string {
   492  	return replaceWord(word, irregularPlurals, irregularSingles, singularRules)
   493  }
   494  
   495  // IsSingular returns true if a word is singular
   496  func IsSingular(word string) bool {
   497  	return checkWord(word, irregularPlurals, irregularSingles, singularRules)
   498  }
   499  
   500  // ToPlural makes a pluralized version of a word
   501  func ToPlural(word string) string {
   502  	return replaceWord(word, irregularSingles, irregularPlurals, pluralRules)
   503  }