github.com/fluhus/gostuff@v0.4.1-0.20240331134726-be71864f2b5d/nlp/stopwords.go (about)

     1  package nlp
     2  
     3  // StopWords is a map of stop words, for token filtering. Modifying this map
     4  // will affect the Tokenize function.
     5  //
     6  // Taken from: http://www.ranks.nl/stopwords
     7  var StopWords = map[string]bool{
     8  	"a":             true,
     9  	"a's":           true,
    10  	"able":          true,
    11  	"about":         true,
    12  	"above":         true,
    13  	"according":     true,
    14  	"accordingly":   true,
    15  	"across":        true,
    16  	"actually":      true,
    17  	"after":         true,
    18  	"afterwards":    true,
    19  	"again":         true,
    20  	"against":       true,
    21  	"ain't":         true,
    22  	"all":           true,
    23  	"allow":         true,
    24  	"allows":        true,
    25  	"almost":        true,
    26  	"alone":         true,
    27  	"along":         true,
    28  	"already":       true,
    29  	"also":          true,
    30  	"although":      true,
    31  	"always":        true,
    32  	"am":            true,
    33  	"among":         true,
    34  	"amongst":       true,
    35  	"an":            true,
    36  	"and":           true,
    37  	"another":       true,
    38  	"any":           true,
    39  	"anybody":       true,
    40  	"anyhow":        true,
    41  	"anyone":        true,
    42  	"anything":      true,
    43  	"anyway":        true,
    44  	"anyways":       true,
    45  	"anywhere":      true,
    46  	"apart":         true,
    47  	"appear":        true,
    48  	"appreciate":    true,
    49  	"appropriate":   true,
    50  	"are":           true,
    51  	"aren't":        true,
    52  	"around":        true,
    53  	"as":            true,
    54  	"aside":         true,
    55  	"ask":           true,
    56  	"asking":        true,
    57  	"associated":    true,
    58  	"at":            true,
    59  	"available":     true,
    60  	"away":          true,
    61  	"awfully":       true,
    62  	"b":             true,
    63  	"be":            true,
    64  	"became":        true,
    65  	"because":       true,
    66  	"become":        true,
    67  	"becomes":       true,
    68  	"becoming":      true,
    69  	"been":          true,
    70  	"before":        true,
    71  	"beforehand":    true,
    72  	"behind":        true,
    73  	"being":         true,
    74  	"believe":       true,
    75  	"below":         true,
    76  	"beside":        true,
    77  	"besides":       true,
    78  	"best":          true,
    79  	"better":        true,
    80  	"between":       true,
    81  	"beyond":        true,
    82  	"both":          true,
    83  	"brief":         true,
    84  	"but":           true,
    85  	"by":            true,
    86  	"c":             true,
    87  	"c'mon":         true,
    88  	"c's":           true,
    89  	"came":          true,
    90  	"can":           true,
    91  	"can't":         true,
    92  	"cannot":        true,
    93  	"cant":          true,
    94  	"cause":         true,
    95  	"causes":        true,
    96  	"certain":       true,
    97  	"certainly":     true,
    98  	"changes":       true,
    99  	"clearly":       true,
   100  	"co":            true,
   101  	"com":           true,
   102  	"come":          true,
   103  	"comes":         true,
   104  	"concerning":    true,
   105  	"consequently":  true,
   106  	"consider":      true,
   107  	"considering":   true,
   108  	"contain":       true,
   109  	"containing":    true,
   110  	"contains":      true,
   111  	"corresponding": true,
   112  	"could":         true,
   113  	"couldn't":      true,
   114  	"course":        true,
   115  	"currently":     true,
   116  	"d":             true,
   117  	"definitely":    true,
   118  	"described":     true,
   119  	"despite":       true,
   120  	"did":           true,
   121  	"didn't":        true,
   122  	"different":     true,
   123  	"do":            true,
   124  	"does":          true,
   125  	"doesn't":       true,
   126  	"doing":         true,
   127  	"don't":         true,
   128  	"done":          true,
   129  	"down":          true,
   130  	"downwards":     true,
   131  	"during":        true,
   132  	"e":             true,
   133  	"each":          true,
   134  	"edu":           true,
   135  	"eg":            true,
   136  	"eight":         true,
   137  	"either":        true,
   138  	"else":          true,
   139  	"elsewhere":     true,
   140  	"enough":        true,
   141  	"entirely":      true,
   142  	"especially":    true,
   143  	"et":            true,
   144  	"etc":           true,
   145  	"even":          true,
   146  	"ever":          true,
   147  	"every":         true,
   148  	"everybody":     true,
   149  	"everyone":      true,
   150  	"everything":    true,
   151  	"everywhere":    true,
   152  	"ex":            true,
   153  	"exactly":       true,
   154  	"example":       true,
   155  	"except":        true,
   156  	"f":             true,
   157  	"far":           true,
   158  	"few":           true,
   159  	"fifth":         true,
   160  	"first":         true,
   161  	"five":          true,
   162  	"followed":      true,
   163  	"following":     true,
   164  	"follows":       true,
   165  	"for":           true,
   166  	"former":        true,
   167  	"formerly":      true,
   168  	"forth":         true,
   169  	"four":          true,
   170  	"from":          true,
   171  	"further":       true,
   172  	"furthermore":   true,
   173  	"g":             true,
   174  	"get":           true,
   175  	"gets":          true,
   176  	"getting":       true,
   177  	"given":         true,
   178  	"gives":         true,
   179  	"go":            true,
   180  	"goes":          true,
   181  	"going":         true,
   182  	"gone":          true,
   183  	"got":           true,
   184  	"gotten":        true,
   185  	"greetings":     true,
   186  	"h":             true,
   187  	"had":           true,
   188  	"hadn't":        true,
   189  	"happens":       true,
   190  	"hardly":        true,
   191  	"has":           true,
   192  	"hasn't":        true,
   193  	"have":          true,
   194  	"haven't":       true,
   195  	"having":        true,
   196  	"he":            true,
   197  	"he's":          true,
   198  	"hello":         true,
   199  	"help":          true,
   200  	"hence":         true,
   201  	"her":           true,
   202  	"here":          true,
   203  	"here's":        true,
   204  	"hereafter":     true,
   205  	"hereby":        true,
   206  	"herein":        true,
   207  	"hereupon":      true,
   208  	"hers":          true,
   209  	"herself":       true,
   210  	"hi":            true,
   211  	"him":           true,
   212  	"himself":       true,
   213  	"his":           true,
   214  	"hither":        true,
   215  	"hopefully":     true,
   216  	"how":           true,
   217  	"howbeit":       true,
   218  	"however":       true,
   219  	"i":             true,
   220  	"i'd":           true,
   221  	"i'll":          true,
   222  	"i'm":           true,
   223  	"i've":          true,
   224  	"ie":            true,
   225  	"if":            true,
   226  	"ignored":       true,
   227  	"immediate":     true,
   228  	"in":            true,
   229  	"inasmuch":      true,
   230  	"inc":           true,
   231  	"indeed":        true,
   232  	"indicate":      true,
   233  	"indicated":     true,
   234  	"indicates":     true,
   235  	"inner":         true,
   236  	"insofar":       true,
   237  	"instead":       true,
   238  	"into":          true,
   239  	"inward":        true,
   240  	"is":            true,
   241  	"isn't":         true,
   242  	"it":            true,
   243  	"it'd":          true,
   244  	"it'll":         true,
   245  	"it's":          true,
   246  	"its":           true,
   247  	"itself":        true,
   248  	"j":             true,
   249  	"just":          true,
   250  	"k":             true,
   251  	"keep":          true,
   252  	"keeps":         true,
   253  	"kept":          true,
   254  	"know":          true,
   255  	"known":         true,
   256  	"knows":         true,
   257  	"l":             true,
   258  	"last":          true,
   259  	"lately":        true,
   260  	"later":         true,
   261  	"latter":        true,
   262  	"latterly":      true,
   263  	"least":         true,
   264  	"less":          true,
   265  	"lest":          true,
   266  	"let":           true,
   267  	"let's":         true,
   268  	"like":          true,
   269  	"liked":         true,
   270  	"likely":        true,
   271  	"little":        true,
   272  	"look":          true,
   273  	"looking":       true,
   274  	"looks":         true,
   275  	"ltd":           true,
   276  	"m":             true,
   277  	"mainly":        true,
   278  	"many":          true,
   279  	"may":           true,
   280  	"maybe":         true,
   281  	"me":            true,
   282  	"mean":          true,
   283  	"meanwhile":     true,
   284  	"merely":        true,
   285  	"might":         true,
   286  	"more":          true,
   287  	"moreover":      true,
   288  	"most":          true,
   289  	"mostly":        true,
   290  	"much":          true,
   291  	"must":          true,
   292  	"my":            true,
   293  	"myself":        true,
   294  	"n":             true,
   295  	"name":          true,
   296  	"namely":        true,
   297  	"nd":            true,
   298  	"near":          true,
   299  	"nearly":        true,
   300  	"necessary":     true,
   301  	"need":          true,
   302  	"needs":         true,
   303  	"neither":       true,
   304  	"never":         true,
   305  	"nevertheless":  true,
   306  	"new":           true,
   307  	"next":          true,
   308  	"nine":          true,
   309  	"no":            true,
   310  	"nobody":        true,
   311  	"non":           true,
   312  	"none":          true,
   313  	"noone":         true,
   314  	"nor":           true,
   315  	"normally":      true,
   316  	"not":           true,
   317  	"nothing":       true,
   318  	"novel":         true,
   319  	"now":           true,
   320  	"nowhere":       true,
   321  	"o":             true,
   322  	"obviously":     true,
   323  	"of":            true,
   324  	"off":           true,
   325  	"often":         true,
   326  	"oh":            true,
   327  	"ok":            true,
   328  	"okay":          true,
   329  	"old":           true,
   330  	"on":            true,
   331  	"once":          true,
   332  	"one":           true,
   333  	"ones":          true,
   334  	"only":          true,
   335  	"onto":          true,
   336  	"or":            true,
   337  	"other":         true,
   338  	"others":        true,
   339  	"otherwise":     true,
   340  	"ought":         true,
   341  	"our":           true,
   342  	"ours":          true,
   343  	"ourselves":     true,
   344  	"out":           true,
   345  	"outside":       true,
   346  	"over":          true,
   347  	"overall":       true,
   348  	"own":           true,
   349  	"p":             true,
   350  	"particular":    true,
   351  	"particularly":  true,
   352  	"per":           true,
   353  	"perhaps":       true,
   354  	"placed":        true,
   355  	"please":        true,
   356  	"plus":          true,
   357  	"possible":      true,
   358  	"presumably":    true,
   359  	"probably":      true,
   360  	"provides":      true,
   361  	"q":             true,
   362  	"que":           true,
   363  	"quite":         true,
   364  	"qv":            true,
   365  	"r":             true,
   366  	"rather":        true,
   367  	"rd":            true,
   368  	"re":            true,
   369  	"really":        true,
   370  	"reasonably":    true,
   371  	"regarding":     true,
   372  	"regardless":    true,
   373  	"regards":       true,
   374  	"relatively":    true,
   375  	"respectively":  true,
   376  	"right":         true,
   377  	"s":             true,
   378  	"said":          true,
   379  	"same":          true,
   380  	"saw":           true,
   381  	"say":           true,
   382  	"saying":        true,
   383  	"says":          true,
   384  	"second":        true,
   385  	"secondly":      true,
   386  	"see":           true,
   387  	"seeing":        true,
   388  	"seem":          true,
   389  	"seemed":        true,
   390  	"seeming":       true,
   391  	"seems":         true,
   392  	"seen":          true,
   393  	"self":          true,
   394  	"selves":        true,
   395  	"sensible":      true,
   396  	"sent":          true,
   397  	"serious":       true,
   398  	"seriously":     true,
   399  	"seven":         true,
   400  	"several":       true,
   401  	"shall":         true,
   402  	"she":           true,
   403  	"should":        true,
   404  	"shouldn't":     true,
   405  	"since":         true,
   406  	"six":           true,
   407  	"so":            true,
   408  	"some":          true,
   409  	"somebody":      true,
   410  	"somehow":       true,
   411  	"someone":       true,
   412  	"something":     true,
   413  	"sometime":      true,
   414  	"sometimes":     true,
   415  	"somewhat":      true,
   416  	"somewhere":     true,
   417  	"soon":          true,
   418  	"sorry":         true,
   419  	"specified":     true,
   420  	"specify":       true,
   421  	"specifying":    true,
   422  	"still":         true,
   423  	"sub":           true,
   424  	"such":          true,
   425  	"sup":           true,
   426  	"sure":          true,
   427  	"t":             true,
   428  	"t's":           true,
   429  	"take":          true,
   430  	"taken":         true,
   431  	"tell":          true,
   432  	"tends":         true,
   433  	"th":            true,
   434  	"than":          true,
   435  	"thank":         true,
   436  	"thanks":        true,
   437  	"thanx":         true,
   438  	"that":          true,
   439  	"that's":        true,
   440  	"thats":         true,
   441  	"the":           true,
   442  	"their":         true,
   443  	"theirs":        true,
   444  	"them":          true,
   445  	"themselves":    true,
   446  	"then":          true,
   447  	"thence":        true,
   448  	"there":         true,
   449  	"there's":       true,
   450  	"thereafter":    true,
   451  	"thereby":       true,
   452  	"therefore":     true,
   453  	"therein":       true,
   454  	"theres":        true,
   455  	"thereupon":     true,
   456  	"these":         true,
   457  	"they":          true,
   458  	"they'd":        true,
   459  	"they'll":       true,
   460  	"they're":       true,
   461  	"they've":       true,
   462  	"think":         true,
   463  	"third":         true,
   464  	"this":          true,
   465  	"thorough":      true,
   466  	"thoroughly":    true,
   467  	"those":         true,
   468  	"though":        true,
   469  	"three":         true,
   470  	"through":       true,
   471  	"throughout":    true,
   472  	"thru":          true,
   473  	"thus":          true,
   474  	"to":            true,
   475  	"together":      true,
   476  	"too":           true,
   477  	"took":          true,
   478  	"toward":        true,
   479  	"towards":       true,
   480  	"tried":         true,
   481  	"tries":         true,
   482  	"truly":         true,
   483  	"try":           true,
   484  	"trying":        true,
   485  	"twice":         true,
   486  	"two":           true,
   487  	"u":             true,
   488  	"un":            true,
   489  	"under":         true,
   490  	"unfortunately": true,
   491  	"unless":        true,
   492  	"unlikely":      true,
   493  	"until":         true,
   494  	"unto":          true,
   495  	"up":            true,
   496  	"upon":          true,
   497  	"us":            true,
   498  	"use":           true,
   499  	"used":          true,
   500  	"useful":        true,
   501  	"uses":          true,
   502  	"using":         true,
   503  	"usually":       true,
   504  	"v":             true,
   505  	"value":         true,
   506  	"various":       true,
   507  	"very":          true,
   508  	"via":           true,
   509  	"viz":           true,
   510  	"vs":            true,
   511  	"w":             true,
   512  	"want":          true,
   513  	"wants":         true,
   514  	"was":           true,
   515  	"wasn't":        true,
   516  	"way":           true,
   517  	"we":            true,
   518  	"we'd":          true,
   519  	"we'll":         true,
   520  	"we're":         true,
   521  	"we've":         true,
   522  	"welcome":       true,
   523  	"well":          true,
   524  	"went":          true,
   525  	"were":          true,
   526  	"weren't":       true,
   527  	"what":          true,
   528  	"what's":        true,
   529  	"whatever":      true,
   530  	"when":          true,
   531  	"whence":        true,
   532  	"whenever":      true,
   533  	"where":         true,
   534  	"where's":       true,
   535  	"whereafter":    true,
   536  	"whereas":       true,
   537  	"whereby":       true,
   538  	"wherein":       true,
   539  	"whereupon":     true,
   540  	"wherever":      true,
   541  	"whether":       true,
   542  	"which":         true,
   543  	"while":         true,
   544  	"whither":       true,
   545  	"who":           true,
   546  	"who's":         true,
   547  	"whoever":       true,
   548  	"whole":         true,
   549  	"whom":          true,
   550  	"whose":         true,
   551  	"why":           true,
   552  	"will":          true,
   553  	"willing":       true,
   554  	"wish":          true,
   555  	"with":          true,
   556  	"within":        true,
   557  	"without":       true,
   558  	"won't":         true,
   559  	"wonder":        true,
   560  	"would":         true,
   561  	"wouldn't":      true,
   562  	"x":             true,
   563  	"y":             true,
   564  	"yes":           true,
   565  	"yet":           true,
   566  	"you":           true,
   567  	"you'd":         true,
   568  	"you'll":        true,
   569  	"you're":        true,
   570  	"you've":        true,
   571  	"your":          true,
   572  	"yours":         true,
   573  	"yourself":      true,
   574  	"yourselves":    true,
   575  	"z":             true,
   576  	"zero":          true,
   577  }