github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/stemmers_test.go (about)

     1  /*
     2   * Copyright 2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package tok
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/blevesearch/bleve/analysis"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestFilterStemmers(t *testing.T) {
    27  	tests := []struct {
    28  		lang string
    29  		in   analysis.TokenStream
    30  		out  analysis.TokenStream
    31  	}{
    32  		{lang: "en",
    33  			in: analysis.TokenStream{
    34  				&analysis.Token{Term: []byte("the")},
    35  				&analysis.Token{Term: []byte("quick")},
    36  				&analysis.Token{Term: []byte("brown")},
    37  				&analysis.Token{Term: []byte("foxes")},
    38  				&analysis.Token{Term: []byte("jump")},
    39  				&analysis.Token{Term: []byte("over")},
    40  				&analysis.Token{Term: []byte("the")},
    41  				&analysis.Token{Term: []byte("big")},
    42  				&analysis.Token{Term: []byte("dogs")},
    43  			},
    44  			out: analysis.TokenStream{
    45  				&analysis.Token{Term: []byte("the")},
    46  				&analysis.Token{Term: []byte("quick")},
    47  				&analysis.Token{Term: []byte("brown")},
    48  				&analysis.Token{Term: []byte("fox")},
    49  				&analysis.Token{Term: []byte("jump")},
    50  				&analysis.Token{Term: []byte("over")},
    51  				&analysis.Token{Term: []byte("the")},
    52  				&analysis.Token{Term: []byte("big")},
    53  				&analysis.Token{Term: []byte("dog")},
    54  			},
    55  		},
    56  		{lang: "es",
    57  			in: analysis.TokenStream{
    58  				&analysis.Token{Term: []byte("deseándoles")},
    59  				&analysis.Token{Term: []byte("muchas")},
    60  				&analysis.Token{Term: []byte("alegrías")},
    61  				&analysis.Token{Term: []byte("a")},
    62  				&analysis.Token{Term: []byte("las")},
    63  				&analysis.Token{Term: []byte("señoritas")},
    64  				&analysis.Token{Term: []byte("y")},
    65  				&analysis.Token{Term: []byte("los")},
    66  				&analysis.Token{Term: []byte("señores")},
    67  				&analysis.Token{Term: []byte("programadores")},
    68  				&analysis.Token{Term: []byte("de")},
    69  				&analysis.Token{Term: []byte("Dgraph")},
    70  			},
    71  			out: analysis.TokenStream{
    72  				&analysis.Token{Term: []byte("deseandol")},
    73  				&analysis.Token{Term: []byte("much")},
    74  				&analysis.Token{Term: []byte("alegri")},
    75  				&analysis.Token{Term: []byte("a")},
    76  				&analysis.Token{Term: []byte("las")},
    77  				&analysis.Token{Term: []byte("señorit")},
    78  				&analysis.Token{Term: []byte("y")},
    79  				&analysis.Token{Term: []byte("los")},
    80  				&analysis.Token{Term: []byte("señor")},
    81  				&analysis.Token{Term: []byte("programador")},
    82  				&analysis.Token{Term: []byte("de")},
    83  				&analysis.Token{Term: []byte("Dgraph")},
    84  			},
    85  		},
    86  		{lang: "x-klingon",
    87  			in: analysis.TokenStream{
    88  				&analysis.Token{Term: []byte("tlhIngan")},
    89  				&analysis.Token{Term: []byte("maH!")},
    90  			},
    91  			out: analysis.TokenStream{
    92  				&analysis.Token{Term: []byte("tlhIngan")},
    93  				&analysis.Token{Term: []byte("maH!")},
    94  			},
    95  		},
    96  		{lang: "en",
    97  			in:  analysis.TokenStream{},
    98  			out: analysis.TokenStream{},
    99  		},
   100  		{lang: "",
   101  			in: analysis.TokenStream{
   102  				&analysis.Token{
   103  					Term: []byte(""),
   104  				},
   105  			},
   106  			out: analysis.TokenStream{
   107  				&analysis.Token{
   108  					Term: []byte(""),
   109  				},
   110  			},
   111  		},
   112  	}
   113  
   114  	for _, tc := range tests {
   115  		out := filterStemmers(tc.lang, tc.in)
   116  		require.Equal(t, tc.out, out)
   117  	}
   118  }