github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/stopwords_test.go (about) 1 /* 2 * Copyright 2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package tok 18 19 import ( 20 "testing" 21 22 "github.com/blevesearch/bleve/analysis" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestFilterStopwords(t *testing.T) { 27 tests := []struct { 28 lang string 29 in analysis.TokenStream 30 out analysis.TokenStream 31 }{ 32 {lang: "en", 33 in: analysis.TokenStream{ 34 &analysis.Token{Term: []byte("the")}, 35 &analysis.Token{Term: []byte("quick")}, 36 &analysis.Token{Term: []byte("brown")}, 37 &analysis.Token{Term: []byte("foxes")}, 38 &analysis.Token{Term: []byte("jump")}, 39 &analysis.Token{Term: []byte("over")}, 40 &analysis.Token{Term: []byte("the")}, 41 &analysis.Token{Term: []byte("big")}, 42 &analysis.Token{Term: []byte("dogs")}, 43 }, 44 out: analysis.TokenStream{ 45 &analysis.Token{Term: []byte("quick")}, 46 &analysis.Token{Term: []byte("brown")}, 47 &analysis.Token{Term: []byte("foxes")}, 48 &analysis.Token{Term: []byte("jump")}, 49 &analysis.Token{Term: []byte("big")}, 50 &analysis.Token{Term: []byte("dogs")}, 51 }, 52 }, 53 {lang: "es", 54 in: analysis.TokenStream{ 55 &analysis.Token{Term: []byte("deseándoles")}, 56 &analysis.Token{Term: []byte("muchas")}, 57 &analysis.Token{Term: []byte("alegrías")}, 58 &analysis.Token{Term: []byte("a")}, 59 &analysis.Token{Term: []byte("las")}, 60 &analysis.Token{Term: []byte("señoritas")}, 61 &analysis.Token{Term: []byte("y")}, 62 &analysis.Token{Term: []byte("los")}, 63 &analysis.Token{Term: []byte("señores")}, 64 &analysis.Token{Term: []byte("programadores")}, 65 &analysis.Token{Term: []byte("de")}, 66 &analysis.Token{Term: []byte("Dgraph")}, 67 }, 68 out: analysis.TokenStream{ 69 &analysis.Token{Term: []byte("deseándoles")}, 70 &analysis.Token{Term: []byte("muchas")}, 71 &analysis.Token{Term: []byte("alegrías")}, 72 &analysis.Token{Term: []byte("señoritas")}, 73 &analysis.Token{Term: []byte("señores")}, 74 &analysis.Token{Term: []byte("programadores")}, 75 &analysis.Token{Term: []byte("Dgraph")}, 76 }, 77 }, 78 {lang: "x-klingon", 79 in: analysis.TokenStream{ 80 &analysis.Token{Term: []byte("tlhIngan")}, 81 &analysis.Token{Term: []byte("maH!")}, 82 }, 83 out: analysis.TokenStream{ 84 &analysis.Token{Term: []byte("tlhIngan")}, 85 &analysis.Token{Term: []byte("maH!")}, 86 }, 87 }, 88 {lang: "en", 89 in: analysis.TokenStream{ 90 &analysis.Token{ 91 Term: []byte("same"), 92 }, 93 }, 94 out: analysis.TokenStream{}, 95 }, 96 {lang: "en", 97 in: analysis.TokenStream{}, 98 out: analysis.TokenStream{}, 99 }, 100 {lang: "", 101 in: analysis.TokenStream{ 102 &analysis.Token{ 103 Term: []byte(""), 104 }, 105 }, 106 out: analysis.TokenStream{ 107 &analysis.Token{ 108 Term: []byte(""), 109 }, 110 }, 111 }, 112 } 113 114 for _, tc := range tests { 115 out := filterStopwords(tc.lang, tc.in) 116 require.Equal(t, tc.out, out) 117 } 118 }