github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/stemmers_test.go (about) 1 /* 2 * Copyright 2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package tok 18 19 import ( 20 "testing" 21 22 "github.com/blevesearch/bleve/analysis" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestFilterStemmers(t *testing.T) { 27 tests := []struct { 28 lang string 29 in analysis.TokenStream 30 out analysis.TokenStream 31 }{ 32 {lang: "en", 33 in: analysis.TokenStream{ 34 &analysis.Token{Term: []byte("the")}, 35 &analysis.Token{Term: []byte("quick")}, 36 &analysis.Token{Term: []byte("brown")}, 37 &analysis.Token{Term: []byte("foxes")}, 38 &analysis.Token{Term: []byte("jump")}, 39 &analysis.Token{Term: []byte("over")}, 40 &analysis.Token{Term: []byte("the")}, 41 &analysis.Token{Term: []byte("big")}, 42 &analysis.Token{Term: []byte("dogs")}, 43 }, 44 out: analysis.TokenStream{ 45 &analysis.Token{Term: []byte("the")}, 46 &analysis.Token{Term: []byte("quick")}, 47 &analysis.Token{Term: []byte("brown")}, 48 &analysis.Token{Term: []byte("fox")}, 49 &analysis.Token{Term: []byte("jump")}, 50 &analysis.Token{Term: []byte("over")}, 51 &analysis.Token{Term: []byte("the")}, 52 &analysis.Token{Term: []byte("big")}, 53 &analysis.Token{Term: []byte("dog")}, 54 }, 55 }, 56 {lang: "es", 57 in: analysis.TokenStream{ 58 &analysis.Token{Term: []byte("deseándoles")}, 59 &analysis.Token{Term: []byte("muchas")}, 60 &analysis.Token{Term: []byte("alegrías")}, 61 &analysis.Token{Term: []byte("a")}, 62 &analysis.Token{Term: []byte("las")}, 63 &analysis.Token{Term: []byte("señoritas")}, 64 &analysis.Token{Term: []byte("y")}, 65 &analysis.Token{Term: []byte("los")}, 66 &analysis.Token{Term: []byte("señores")}, 67 &analysis.Token{Term: []byte("programadores")}, 68 &analysis.Token{Term: []byte("de")}, 69 &analysis.Token{Term: []byte("Dgraph")}, 70 }, 71 out: analysis.TokenStream{ 72 &analysis.Token{Term: []byte("deseandol")}, 73 &analysis.Token{Term: []byte("much")}, 74 &analysis.Token{Term: []byte("alegri")}, 75 &analysis.Token{Term: []byte("a")}, 76 &analysis.Token{Term: []byte("las")}, 77 &analysis.Token{Term: []byte("señorit")}, 78 &analysis.Token{Term: []byte("y")}, 79 &analysis.Token{Term: []byte("los")}, 80 &analysis.Token{Term: []byte("señor")}, 81 &analysis.Token{Term: []byte("programador")}, 82 &analysis.Token{Term: []byte("de")}, 83 &analysis.Token{Term: []byte("Dgraph")}, 84 }, 85 }, 86 {lang: "x-klingon", 87 in: analysis.TokenStream{ 88 &analysis.Token{Term: []byte("tlhIngan")}, 89 &analysis.Token{Term: []byte("maH!")}, 90 }, 91 out: analysis.TokenStream{ 92 &analysis.Token{Term: []byte("tlhIngan")}, 93 &analysis.Token{Term: []byte("maH!")}, 94 }, 95 }, 96 {lang: "en", 97 in: analysis.TokenStream{}, 98 out: analysis.TokenStream{}, 99 }, 100 {lang: "", 101 in: analysis.TokenStream{ 102 &analysis.Token{ 103 Term: []byte(""), 104 }, 105 }, 106 out: analysis.TokenStream{ 107 &analysis.Token{ 108 Term: []byte(""), 109 }, 110 }, 111 }, 112 } 113 114 for _, tc := range tests { 115 out := filterStemmers(tc.lang, tc.in) 116 require.Equal(t, tc.out, out) 117 } 118 }