github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/stopwords/detector_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package stopwords 13 14 import ( 15 "testing" 16 17 "github.com/stretchr/testify/require" 18 "github.com/weaviate/weaviate/entities/models" 19 ) 20 21 func TestStopwordDetector(t *testing.T) { 22 type testcase struct { 23 cfg models.StopwordConfig 24 input []string 25 expectedCountable int 26 } 27 28 runTest := func(t *testing.T, tests []testcase) { 29 for _, test := range tests { 30 sd, err := NewDetectorFromConfig(test.cfg) 31 require.Nil(t, err) 32 33 var result []string 34 for _, word := range test.input { 35 if !sd.IsStopword(word) { 36 result = append(result, word) 37 } 38 } 39 require.Equal(t, test.expectedCountable, len(result)) 40 } 41 } 42 43 t.Run("with en preset, additions", func(t *testing.T) { 44 tests := []testcase{ 45 { 46 cfg: models.StopwordConfig{ 47 Preset: "en", 48 Additions: []string{"dog"}, 49 }, 50 input: []string{"dog", "dog", "dog", "dog"}, 51 expectedCountable: 0, 52 }, 53 { 54 cfg: models.StopwordConfig{ 55 Preset: "en", 56 Additions: []string{"dog"}, 57 }, 58 input: []string{"dog", "dog", "dog", "cat"}, 59 expectedCountable: 1, 60 }, 61 { 62 cfg: models.StopwordConfig{ 63 Preset: "en", 64 Additions: []string{"dog"}, 65 }, 66 input: []string{"a", "dog", "is", "the", "best"}, 67 expectedCountable: 1, 68 }, 69 } 70 71 runTest(t, tests) 72 }) 73 74 t.Run("with no preset, additions", func(t *testing.T) { 75 tests := []testcase{ 76 { 77 cfg: models.StopwordConfig{ 78 Preset: "none", 79 Additions: []string{"dog"}, 80 }, 81 input: []string{"a", "dog", "is", "the", "best"}, 82 expectedCountable: 4, 83 }, 84 } 85 86 runTest(t, tests) 87 }) 88 89 t.Run("with en preset, removals", func(t *testing.T) { 90 tests := []testcase{ 91 { 92 cfg: models.StopwordConfig{ 93 Preset: "en", 94 Removals: []string{"a"}, 95 }, 96 input: []string{"a", "dog", "is", "the", "best"}, 97 expectedCountable: 3, 98 }, 99 { 100 cfg: models.StopwordConfig{ 101 Preset: "en", 102 Removals: []string{"a", "is", "the"}, 103 }, 104 input: []string{"a", "dog", "is", "the", "best"}, 105 expectedCountable: 5, 106 }, 107 } 108 109 runTest(t, tests) 110 }) 111 112 t.Run("with en preset, removals", func(t *testing.T) { 113 tests := []testcase{ 114 { 115 cfg: models.StopwordConfig{ 116 Preset: "en", 117 Removals: []string{"a"}, 118 }, 119 input: []string{"a", "dog", "is", "the", "best"}, 120 expectedCountable: 3, 121 }, 122 { 123 cfg: models.StopwordConfig{ 124 Preset: "en", 125 Removals: []string{"a", "is", "the"}, 126 }, 127 input: []string{"a", "dog", "is", "the", "best"}, 128 expectedCountable: 5, 129 }, 130 } 131 132 runTest(t, tests) 133 }) 134 135 t.Run("with en preset, additions, removals", func(t *testing.T) { 136 tests := []testcase{ 137 { 138 cfg: models.StopwordConfig{ 139 Preset: "en", 140 Additions: []string{"dog"}, 141 Removals: []string{"a"}, 142 }, 143 input: []string{"a", "dog", "is", "the", "best"}, 144 expectedCountable: 2, 145 }, 146 { 147 cfg: models.StopwordConfig{ 148 Preset: "en", 149 Additions: []string{"dog", "best"}, 150 Removals: []string{"a", "the", "is"}, 151 }, 152 input: []string{"a", "dog", "is", "the", "best"}, 153 expectedCountable: 3, 154 }, 155 } 156 157 runTest(t, tests) 158 }) 159 }