github.com/fluhus/gostuff@v0.4.1-0.20240331134726-be71864f2b5d/nlp/stopwords.go (about) 1 package nlp 2 3 // StopWords is a map of stop words, for token filtering. Modifying this map 4 // will affect the Tokenize function. 5 // 6 // Taken from: http://www.ranks.nl/stopwords 7 var StopWords = map[string]bool{ 8 "a": true, 9 "a's": true, 10 "able": true, 11 "about": true, 12 "above": true, 13 "according": true, 14 "accordingly": true, 15 "across": true, 16 "actually": true, 17 "after": true, 18 "afterwards": true, 19 "again": true, 20 "against": true, 21 "ain't": true, 22 "all": true, 23 "allow": true, 24 "allows": true, 25 "almost": true, 26 "alone": true, 27 "along": true, 28 "already": true, 29 "also": true, 30 "although": true, 31 "always": true, 32 "am": true, 33 "among": true, 34 "amongst": true, 35 "an": true, 36 "and": true, 37 "another": true, 38 "any": true, 39 "anybody": true, 40 "anyhow": true, 41 "anyone": true, 42 "anything": true, 43 "anyway": true, 44 "anyways": true, 45 "anywhere": true, 46 "apart": true, 47 "appear": true, 48 "appreciate": true, 49 "appropriate": true, 50 "are": true, 51 "aren't": true, 52 "around": true, 53 "as": true, 54 "aside": true, 55 "ask": true, 56 "asking": true, 57 "associated": true, 58 "at": true, 59 "available": true, 60 "away": true, 61 "awfully": true, 62 "b": true, 63 "be": true, 64 "became": true, 65 "because": true, 66 "become": true, 67 "becomes": true, 68 "becoming": true, 69 "been": true, 70 "before": true, 71 "beforehand": true, 72 "behind": true, 73 "being": true, 74 "believe": true, 75 "below": true, 76 "beside": true, 77 "besides": true, 78 "best": true, 79 "better": true, 80 "between": true, 81 "beyond": true, 82 "both": true, 83 "brief": true, 84 "but": true, 85 "by": true, 86 "c": true, 87 "c'mon": true, 88 "c's": true, 89 "came": true, 90 "can": true, 91 "can't": true, 92 "cannot": true, 93 "cant": true, 94 "cause": true, 95 "causes": true, 96 "certain": true, 97 "certainly": true, 98 "changes": true, 99 "clearly": true, 100 "co": true, 101 "com": true, 102 "come": true, 103 "comes": true, 104 "concerning": true, 105 "consequently": true, 106 "consider": true, 107 "considering": true, 108 "contain": true, 109 "containing": true, 110 "contains": true, 111 "corresponding": true, 112 "could": true, 113 "couldn't": true, 114 "course": true, 115 "currently": true, 116 "d": true, 117 "definitely": true, 118 "described": true, 119 "despite": true, 120 "did": true, 121 "didn't": true, 122 "different": true, 123 "do": true, 124 "does": true, 125 "doesn't": true, 126 "doing": true, 127 "don't": true, 128 "done": true, 129 "down": true, 130 "downwards": true, 131 "during": true, 132 "e": true, 133 "each": true, 134 "edu": true, 135 "eg": true, 136 "eight": true, 137 "either": true, 138 "else": true, 139 "elsewhere": true, 140 "enough": true, 141 "entirely": true, 142 "especially": true, 143 "et": true, 144 "etc": true, 145 "even": true, 146 "ever": true, 147 "every": true, 148 "everybody": true, 149 "everyone": true, 150 "everything": true, 151 "everywhere": true, 152 "ex": true, 153 "exactly": true, 154 "example": true, 155 "except": true, 156 "f": true, 157 "far": true, 158 "few": true, 159 "fifth": true, 160 "first": true, 161 "five": true, 162 "followed": true, 163 "following": true, 164 "follows": true, 165 "for": true, 166 "former": true, 167 "formerly": true, 168 "forth": true, 169 "four": true, 170 "from": true, 171 "further": true, 172 "furthermore": true, 173 "g": true, 174 "get": true, 175 "gets": true, 176 "getting": true, 177 "given": true, 178 "gives": true, 179 "go": true, 180 "goes": true, 181 "going": true, 182 "gone": true, 183 "got": true, 184 "gotten": true, 185 "greetings": true, 186 "h": true, 187 "had": true, 188 "hadn't": true, 189 "happens": true, 190 "hardly": true, 191 "has": true, 192 "hasn't": true, 193 "have": true, 194 "haven't": true, 195 "having": true, 196 "he": true, 197 "he's": true, 198 "hello": true, 199 "help": true, 200 "hence": true, 201 "her": true, 202 "here": true, 203 "here's": true, 204 "hereafter": true, 205 "hereby": true, 206 "herein": true, 207 "hereupon": true, 208 "hers": true, 209 "herself": true, 210 "hi": true, 211 "him": true, 212 "himself": true, 213 "his": true, 214 "hither": true, 215 "hopefully": true, 216 "how": true, 217 "howbeit": true, 218 "however": true, 219 "i": true, 220 "i'd": true, 221 "i'll": true, 222 "i'm": true, 223 "i've": true, 224 "ie": true, 225 "if": true, 226 "ignored": true, 227 "immediate": true, 228 "in": true, 229 "inasmuch": true, 230 "inc": true, 231 "indeed": true, 232 "indicate": true, 233 "indicated": true, 234 "indicates": true, 235 "inner": true, 236 "insofar": true, 237 "instead": true, 238 "into": true, 239 "inward": true, 240 "is": true, 241 "isn't": true, 242 "it": true, 243 "it'd": true, 244 "it'll": true, 245 "it's": true, 246 "its": true, 247 "itself": true, 248 "j": true, 249 "just": true, 250 "k": true, 251 "keep": true, 252 "keeps": true, 253 "kept": true, 254 "know": true, 255 "known": true, 256 "knows": true, 257 "l": true, 258 "last": true, 259 "lately": true, 260 "later": true, 261 "latter": true, 262 "latterly": true, 263 "least": true, 264 "less": true, 265 "lest": true, 266 "let": true, 267 "let's": true, 268 "like": true, 269 "liked": true, 270 "likely": true, 271 "little": true, 272 "look": true, 273 "looking": true, 274 "looks": true, 275 "ltd": true, 276 "m": true, 277 "mainly": true, 278 "many": true, 279 "may": true, 280 "maybe": true, 281 "me": true, 282 "mean": true, 283 "meanwhile": true, 284 "merely": true, 285 "might": true, 286 "more": true, 287 "moreover": true, 288 "most": true, 289 "mostly": true, 290 "much": true, 291 "must": true, 292 "my": true, 293 "myself": true, 294 "n": true, 295 "name": true, 296 "namely": true, 297 "nd": true, 298 "near": true, 299 "nearly": true, 300 "necessary": true, 301 "need": true, 302 "needs": true, 303 "neither": true, 304 "never": true, 305 "nevertheless": true, 306 "new": true, 307 "next": true, 308 "nine": true, 309 "no": true, 310 "nobody": true, 311 "non": true, 312 "none": true, 313 "noone": true, 314 "nor": true, 315 "normally": true, 316 "not": true, 317 "nothing": true, 318 "novel": true, 319 "now": true, 320 "nowhere": true, 321 "o": true, 322 "obviously": true, 323 "of": true, 324 "off": true, 325 "often": true, 326 "oh": true, 327 "ok": true, 328 "okay": true, 329 "old": true, 330 "on": true, 331 "once": true, 332 "one": true, 333 "ones": true, 334 "only": true, 335 "onto": true, 336 "or": true, 337 "other": true, 338 "others": true, 339 "otherwise": true, 340 "ought": true, 341 "our": true, 342 "ours": true, 343 "ourselves": true, 344 "out": true, 345 "outside": true, 346 "over": true, 347 "overall": true, 348 "own": true, 349 "p": true, 350 "particular": true, 351 "particularly": true, 352 "per": true, 353 "perhaps": true, 354 "placed": true, 355 "please": true, 356 "plus": true, 357 "possible": true, 358 "presumably": true, 359 "probably": true, 360 "provides": true, 361 "q": true, 362 "que": true, 363 "quite": true, 364 "qv": true, 365 "r": true, 366 "rather": true, 367 "rd": true, 368 "re": true, 369 "really": true, 370 "reasonably": true, 371 "regarding": true, 372 "regardless": true, 373 "regards": true, 374 "relatively": true, 375 "respectively": true, 376 "right": true, 377 "s": true, 378 "said": true, 379 "same": true, 380 "saw": true, 381 "say": true, 382 "saying": true, 383 "says": true, 384 "second": true, 385 "secondly": true, 386 "see": true, 387 "seeing": true, 388 "seem": true, 389 "seemed": true, 390 "seeming": true, 391 "seems": true, 392 "seen": true, 393 "self": true, 394 "selves": true, 395 "sensible": true, 396 "sent": true, 397 "serious": true, 398 "seriously": true, 399 "seven": true, 400 "several": true, 401 "shall": true, 402 "she": true, 403 "should": true, 404 "shouldn't": true, 405 "since": true, 406 "six": true, 407 "so": true, 408 "some": true, 409 "somebody": true, 410 "somehow": true, 411 "someone": true, 412 "something": true, 413 "sometime": true, 414 "sometimes": true, 415 "somewhat": true, 416 "somewhere": true, 417 "soon": true, 418 "sorry": true, 419 "specified": true, 420 "specify": true, 421 "specifying": true, 422 "still": true, 423 "sub": true, 424 "such": true, 425 "sup": true, 426 "sure": true, 427 "t": true, 428 "t's": true, 429 "take": true, 430 "taken": true, 431 "tell": true, 432 "tends": true, 433 "th": true, 434 "than": true, 435 "thank": true, 436 "thanks": true, 437 "thanx": true, 438 "that": true, 439 "that's": true, 440 "thats": true, 441 "the": true, 442 "their": true, 443 "theirs": true, 444 "them": true, 445 "themselves": true, 446 "then": true, 447 "thence": true, 448 "there": true, 449 "there's": true, 450 "thereafter": true, 451 "thereby": true, 452 "therefore": true, 453 "therein": true, 454 "theres": true, 455 "thereupon": true, 456 "these": true, 457 "they": true, 458 "they'd": true, 459 "they'll": true, 460 "they're": true, 461 "they've": true, 462 "think": true, 463 "third": true, 464 "this": true, 465 "thorough": true, 466 "thoroughly": true, 467 "those": true, 468 "though": true, 469 "three": true, 470 "through": true, 471 "throughout": true, 472 "thru": true, 473 "thus": true, 474 "to": true, 475 "together": true, 476 "too": true, 477 "took": true, 478 "toward": true, 479 "towards": true, 480 "tried": true, 481 "tries": true, 482 "truly": true, 483 "try": true, 484 "trying": true, 485 "twice": true, 486 "two": true, 487 "u": true, 488 "un": true, 489 "under": true, 490 "unfortunately": true, 491 "unless": true, 492 "unlikely": true, 493 "until": true, 494 "unto": true, 495 "up": true, 496 "upon": true, 497 "us": true, 498 "use": true, 499 "used": true, 500 "useful": true, 501 "uses": true, 502 "using": true, 503 "usually": true, 504 "v": true, 505 "value": true, 506 "various": true, 507 "very": true, 508 "via": true, 509 "viz": true, 510 "vs": true, 511 "w": true, 512 "want": true, 513 "wants": true, 514 "was": true, 515 "wasn't": true, 516 "way": true, 517 "we": true, 518 "we'd": true, 519 "we'll": true, 520 "we're": true, 521 "we've": true, 522 "welcome": true, 523 "well": true, 524 "went": true, 525 "were": true, 526 "weren't": true, 527 "what": true, 528 "what's": true, 529 "whatever": true, 530 "when": true, 531 "whence": true, 532 "whenever": true, 533 "where": true, 534 "where's": true, 535 "whereafter": true, 536 "whereas": true, 537 "whereby": true, 538 "wherein": true, 539 "whereupon": true, 540 "wherever": true, 541 "whether": true, 542 "which": true, 543 "while": true, 544 "whither": true, 545 "who": true, 546 "who's": true, 547 "whoever": true, 548 "whole": true, 549 "whom": true, 550 "whose": true, 551 "why": true, 552 "will": true, 553 "willing": true, 554 "wish": true, 555 "with": true, 556 "within": true, 557 "without": true, 558 "won't": true, 559 "wonder": true, 560 "would": true, 561 "wouldn't": true, 562 "x": true, 563 "y": true, 564 "yes": true, 565 "yet": true, 566 "you": true, 567 "you'd": true, 568 "you'll": true, 569 "you're": true, 570 "you've": true, 571 "your": true, 572 "yours": true, 573 "yourself": true, 574 "yourselves": true, 575 "z": true, 576 "zero": true, 577 }